bold/ 0000755 0001762 0000144 00000000000 13134504041 011165 5 ustar ligges users bold/inst/ 0000755 0001762 0000144 00000000000 13134420302 012137 5 ustar ligges users bold/inst/doc/ 0000755 0001762 0000144 00000000000 13134420302 012704 5 ustar ligges users bold/inst/doc/bold_vignette.html 0000644 0001762 0000144 00000106764 13134420302 016435 0 ustar ligges users
bold info
bold
is an R package to connect to BOLD Systems via their API. Functions in bold
let you search for sequence data, specimen data, sequence + specimen data, and download raw trace files.
bold info
Using bold
Install
Install bold
from CRAN
install.packages("bold")
Or install the development version from GitHub
devtools::install_github("ropensci/bold")
Load the package
library("bold")
Search for taxonomic names via names
bold_tax_name
searches for names with names.
bold_tax_name(name = 'Diplura')
#> input taxid taxon tax_rank tax_division parentid parentname
#> 1 Diplura 591238 Diplura order Animals 82 Insecta
#> 2 Diplura 603673 Diplura genus Protists 53974 Scytosiphonaceae
#> taxonrep
#> 1 Diplura
#> 2 <NA>
bold_tax_name(name = c('Diplura', 'Osmia'))
#> input taxid taxon tax_rank tax_division parentid parentname
#> 1 Diplura 591238 Diplura order Animals 82 Insecta
#> 2 Diplura 603673 Diplura genus Protists 53974 Scytosiphonaceae
#> 3 Osmia 4940 Osmia genus Animals 4962 Megachilinae
#> taxonrep
#> 1 Diplura
#> 2 <NA>
#> 3 Osmia
Search for taxonomic names via BOLD identifiers
bold_tax_id
searches for names with BOLD identifiers.
bold_tax_id(id = 88899)
#> input taxid taxon tax_rank tax_division parentid parentname
#> 1 88899 88899 Momotus genus Animals 88898 Momotidae
bold_tax_id(id = c(88899, 125295))
#> input taxid taxon tax_rank tax_division parentid parentname
#> 1 88899 88899 Momotus genus Animals 88898 Momotidae
#> 2 125295 125295 Helianthus genus Plants 100962 Asteraceae
Search for sequence data only
The BOLD sequence API gives back sequence data, with a bit of metadata.
The default is to get a list back
bold_seq(taxon = 'Coelioxys')[1:2]
#> [[1]]
#> [[1]]$id
#> [1] "FBAPB491-09"
#>
#> [[1]]$name
#> [1] "Coelioxys conica"
#>
#> [[1]]$gene
#> [1] "FBAPB491-09"
#>
#> [[1]]$sequence
#> [1] "---------------------ACCTCTTTAAGAATAATTATTCGTATAGAAATAAGAATTCCAGGATCTTGAATTAATAATGATCAAATTTATAACTCCTTTATTACAGCACATGCATTTTTAATAATTTTTTTTTTAGTTATACCTTTTCTTATTGGAGGATTTGGAAATTGATTAGTACCTTTAATATTAGGATCACCAGATATAGCTTTCCCACGAATAAATAATATTAGATTTTGATTATTACCTCCTTCTTTATTAATATTATTATTAAGTAATTTAATAAATCCCAGACCAGGAACAGGCTGAACAGTTTATCCTCCTTTATCTTTATACACATACCACCCTTCTCCCTCAGTTGATTTAGCAATTTTTTCACTACATCTATCAGGAATCTCTTCTATTATTGGATCTATAAATTTTATTGTTACAATTTTAATAATAAAAAACTTTTCAATAAATTATAATCAAATACCATTATTCCCATGATCTATTTTAATTACTACTATTTTATTATTATTATCACTACCTGTATTAGCTGGTGCTATTACTATATTATTATTTGATCGAAATTTAAATTCTTCTTTTTTTGACCCTATAGGAGGAGGAGACCCAATTTTATACCAACATTTA"
#>
#>
#> [[2]]
#> [[2]]$id
#> [1] "FBAPC351-10"
#>
#> [[2]]$name
#> [1] "Coelioxys afra"
#>
#> [[2]]$gene
#> [1] "FBAPC351-10"
#>
#> [[2]]$sequence
#> [1] "---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ACGAATAAATAATGTAAGATTTTGACTATTACCTCCCTCAATTTTCTTATTATTATCAAGAACCCTAATTAACCCAAGAGCTGGTACTGGATGAACTGTATATCCTCCTTTATCCTTATATACATTTCATGCCTCACCTTCCGTTGATTTAGCAATTTTTTCACTTCATTTATCAGGAATTTCATCAATTATTGGATCAATAAATTTTATTGTTACAATCTTAATAATAAAAAATTTTTCTTTAAATTATAGACAAATACCATTATTTTCATGATCAGTTTTAATTACTACAATTTTACTTTTATTATCATTACCAATTTTAGCTGGAGCAATTACTATACTCCTATTTGATCGAAATTTAAATACCTCATTCTTTGACCCAATAGGAGGAGGAGATCCAATTTTATATCAACATTTATTT"
You can optionally get back the httr
response object
res <- bold_seq(taxon = 'Coelioxys', response = TRUE)
res$headers
#> $date
#> [1] "Tue, 15 Sep 2015 20:02:31 GMT"
#>
#> $server
#> [1] "Apache/2.2.15 (Red Hat)"
#>
#> $`x-powered-by`
#> [1] "PHP/5.3.15"
#>
#> $`content-disposition`
#> [1] "attachment; filename=fasta.fas"
#>
#> $connection
#> [1] "close"
#>
#> $`transfer-encoding`
#> [1] "chunked"
#>
#> $`content-type`
#> [1] "application/x-download"
#>
#> attr(,"class")
#> [1] "insensitive" "list"
You can do geographic searches
bold_seq(geo = "USA")
#> [[1]]
#> [[1]]$id
#> [1] "GBAN1777-08"
#>
#> [[1]]$name
#> [1] "Macrobdella decora"
#>
#> [[1]]$gene
#> [1] "GBAN1777-08"
#>
#> [[1]]$sequence
#> [1] "---------------------------------ATTGGAATCTTGTATTTCTTATTAGGTACATGATCTGCTATAGTAGGGACCTCTATA---AGAATAATTATTCGAATTGAATTAGCTCAACCTGGGTCGTTTTTAGGAAAT---GATCAAATTTACAATACTATTGTTACTGCTCATGGATTAATTATAATTTTTTTTATAGTAATACCTATTTTAATTGGAGGGTTTGGTAATTGATTAATTCCGCTAATA---ATTGGTTCTCCTGATATAGCTTTTCCACGTCTTAATAATTTAAGATTTTGATTACTTCCGCCATCTTTAACTATACTTTTTTGTTCATCTATAGTCGAAAATGGAGTAGGTACTGGATGGACTATTTACCCTCCTTTAGCAGATAACATTGCTCATTCTGGACCTTCTGTAGATATA---GCAATTTTTTCACTTCATTTAGCTGGTGCTTCTTCTATTTTAGGTTCATTAAATTTTATTACTACTGTAGTTAATATACGATGACCAGGGATATCTATAGAGCGAATTCCTTTATTTATTTGATCCGTAATTATTACTACTGTATTGCTATTATTATCTTTACCAGTATTAGCAGCT---GCTATTTCAATATTATTAACAGATCGTAACTTAAATACTAGATTTTTTGACCCAATAGGAGGAGGGGATCCTATTTTATTCCAACATTTATTTTGATTTTTTGGCCACCCTGAAGTTTATATTTTAATTTTACCAGGATTTGGAGCTATTTCTCATGTAGTAAGTCATAACTCT---AAAAAATTAGAACCGTTTGGATCATTAGGGATATTATATGCAATAATTGGAATTGCAATTTTAGGTTTTATTGTTTGAGCACATCATATATTTACAGTAGGTCTTGATGTAGATACACGAGCTTATTTTACAGCAGCTACAATAGTTATTGCTGTTCCTACAGGAATTAAAGTATTTAGGTGATTG---GCAACT"
#>
#>
#> [[2]]
#> [[2]]$id
#> [1] "GBAN1780-08"
#>
#> [[2]]$name
#> [1] "Haemopis terrestris"
#>
#> [[2]]$gene
#> [1] "GBAN1780-08"
#>
#> [[2]]$sequence
#> [1] "---------------------------------ATTGGAACWTTWTATTTTATTTTNGGNGCTTGATCTGCTATATTNGGGATCTCAATA---AGGAATATTATTCGAATTGAGCCATCTCAACCTGGGAGATTATTAGGAAAT---GATCAATTATATAATTCATTAGTAACAGCTCATGGATTAATTATAATTTTCTTTATGGTTATGCCTATTTTGATTGGTGGGTTTGGTAATTGATTACTACCTTTAATA---ATTGGAGCCCCTGATATAGCTTTTCCTCGATTAAATAATTTAAGTTTTTGATTATTACCACCTTCATTAATTATATTGTTAAGATCCTCTATTATTGAAAGAGGGGTAGGTACAGGTTGAACCTTATATCCTCCTTTAGCAGATAGATTATTTCATTCAGGTCCATCGGTAGATATA---GCTATTTTTTCATTACATATAGCTGGAGCATCATCTATTTTAGGCTCATTAAACTTTATTTCTACAATTATTAATATACGAATTAAAGGTATAAGATCTGATCGAGTACCTTTATTTGTATGATCAGTTGTTATTACAACAGTTCTGTTATTATTGTCTTTACCTGTTTTAGCTGCA---GCTATTACTATATTATTAACAGATCGTAATTTAAATACTACTTTTTTTGATCCTATAGGAGGTGGAGATCCAGTATTGTTTCAACACTTATTTTGATTTTTTGGTCATCCAGAAGTATATATTTTGATTTTACCAGGATTTGGAGCAATTTCTCATATTATTACAAATAATTCT---AAAAAATTGGAACCTTTTGGATCTCTTGGTATAATTTATGCTATAATTGGAATTGCAGTTTTAGGGTTTATTGTATGAGCCCATCATATATTTACTGTAGGATTAGATGTTGATACTCGAGCTTATTTTACAGCAGCTACTATAGTTATTGCTGTTCCTACTGGTATTAAAGTTTTTAGGTGATTA---GCAACA"
#>
#>
#> [[3]]
#> [[3]]$id
#> [1] "GBNM0293-06"
#>
#> [[3]]$name
#> [1] "Steinernema carpocapsae"
#>
#> [[3]]$gene
#> [1] "GBNM0293-06"
#>
#> [[3]]$sequence
#> [1] "---------------------------------------------------------------------------------ACAAGATTATCTCTTATTATTCGTTTAGAGTTGGCTCAACCTGGTCTTCTTTTGGGTAAT---GGTCAATTATATAATTCTATTATTACTGCTCATGCTATTCTTATAATTTTTTTCATAGTTATACCTAGAATAATTGGTGGTTTTGGTAATTGAATATTACCTTTAATATTGGGGGCTCCTGATATAAGTTTTCCACGTTTGAATAATTTAAGTTTTTGATTGCTACCAACTGCTATATTTTTGATTTTAGATTCTTGTTTTGTTGACACTGGTTGTGGTACTAGTTGAACTGTTTATCCTCCTTTGAGG---ACTTTAGGTCACCCTGGYAGAAGTGTAGATTTAGCTATTTTTAGTCTTCATTGTGCAGGAATTAGCTCAATTTTAGGGGCTATTAATTTTATATGTACTACAAAAAATCTTCGTAGTAGTTCTATTTCTTTGGAACATATAAGACTTTTTGTTTGGGCTGTTTTTGTTACTGTTTTTTTATTAGTTTTATCTTTACCTGTTTTAGCTGGTGCTATTACTATGCTTTTAACAGACCGTAATTTAAATACTTCTTTTTTT------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"
#>
#>
#> [[4]]
#> [[4]]$id
#> [1] "NEONV108-11"
#>
#> [[4]]$name
#> [1] "Aedes thelcter"
#>
#> [[4]]$gene
#> [1] "NEONV108-11"
#>
#> [[4]]$sequence
#> [1] "AACTTTATACTTCATCTTCGGAGTTTGATCAGGAATAGTTGGTACATCATTAAGAATTTTAATTCGTGCTGAATTAAGTCAACCAGGTATATTTATTGGAAATGACCAAATTTATAATGTAATTGTTACAGCTCATGCTTTTATTATAATTTTCTTTATAGTTATACCTATTATAATTGGAGGATTTGGAAATTGACTAGTTCCTCTAATATTAGGAGCCCCAGATATAGCTTTCCCTCGAATAAATAATATAAGTTTTTGAATACTACCTCCCTCATTAACTCTTCTACTTTCAAGTAGTATAGTAGAAAATGGATCAGGAACAGGATGAACAGTTTATCCACCTCTTTCATCTGGAACTGCTCATGCAGGAGCCTCTGTTGATTTAACTATTTTTTCTCTTCATTTAGCCGGAGTTTCATCAATTTTAGGGGCTGTAAATTTTATTACTACTGTAATTAATATACGATCTGCAGGAATTACTCTTGATCGACTACCTTTATTCGTTTGATCTGTAGTAATTACAGCTGTTTTATTACTTCTTTCACTTCCTGTATTAGCTGGAGCTATTACAATACTATTAACTGATCGAAATTTAAATACATCTTTCTTTGATCCAATTGGAGGAGGAGACCCAATTTTATACCAACATTTATTT"
#>
#>
#> [[5]]
#> [[5]]$id
#> [1] "NEONV109-11"
#>
#> [[5]]$name
#> [1] "Aedes thelcter"
#>
#> [[5]]$gene
#> [1] "NEONV109-11"
#>
#> [[5]]$sequence
#> [1] "AACTTTATACTTCATCTTCGGAGTTTGATCAGGAATAGTTGGTACATCATTAAGAATTTTAATTCGTGCTGAATTAAGTCAACCAGGTATATTTATTGGAAATGACCAAATTTATAATGTAATTGTTACAGCTCATGCTTTTATTATAATTTTCTTTATAGTTATACCTATTATAATTGGAGGATTTGGAAATTGACTAGTTCCTCTAATATTAGGAGCCCCAGATATAGCTTTCCCTCGAATAAATAATATAAGTTTTTGAATACTACCTCCCTCATTAACTCTTCTACTTTCAAGTAGTATAGTAGAAAATGGGTCAGGAACAGGATGAACAGTTTATCCACCTCTTTCATCTGGAACTGCTCATGCAGGAGCCTCTGTTGATTTAACTATTTTTTCTCTTCATTTAGCCGGAGTTTCATCAATTTTAGGGGCTGTAAATTTTATTACTACTGTAATTAATATACGATCTGCAGGAATTACTCTTGATCGACTACCTTTATTCGTTTGATCTGTAGTAATTACAGCTGTTTTATTACTTCTTTCACTTCCTGTATTAGCTGGAGCTATTACAATACTATTAACTGATCGAAATTTAAATACATCTTTCTTTGACCCAATTGGAGGGGGAGACCCAATTTTATACCAACATTTATTT"
And you can search by researcher name
bold_seq(researchers = 'Thibaud Decaens')[[1]]
#> $id
#> [1] "BGABA657-14"
#>
#> $name
#> [1] "Coleoptera"
#>
#> $gene
#> [1] "BGABA657-14"
#>
#> $sequence
#> [1] "ACACTCTATTTCATTTTCGGAGCTTGATCAGGAATAGTAGGAACTTCTTTAAGAATACTAATTCGATCTGAATTGGGAAACCCCGGCTCATTGATTGGGGATGATCAAATTTATAATGTTATTGTAACAGCCCATGCATTCATTATAATTTTTTTTATAGTAATACCGATCATAATAGGAGGTTTTGGAAATTGATTAGTCCCGCTAATATTAGGTGCCCCAGATATAGCATTTCCTCGAATAAATAATATAAGATTTTGACTTCTTCCGCCTTCATTAACTTTACTTATTATAAGAAGAATTGTAGAAAACGGGGCGGGAACAGGATGAACAGTTTACCCACCCCTCTCTTCTAACATTGCTCATAGAGGAGCCTCTGTAGATCTTGCAATTTTTAGATTACATTTAGCCGGTGTATCATCAATTTTAGGTGCAGTTAATTTTATTACAACTATTATTAATATACGACCTAAAGGAATAACATTTGATCGCATACCTTTATTTGTATGAGCTGTAGCTTTAACTGCATTACTTTTATTATTATCTTTACCAGTATTAGCAGGTGCAATTACAATACTTTTAACTGATCGA---------------------------------------"
by taxon IDs
bold_seq(ids = c('ACRJP618-11', 'ACRJP619-11'))
#> [[1]]
#> [[1]]$id
#> [1] "ACRJP618-11"
#>
#> [[1]]$name
#> [1] "Lepidoptera"
#>
#> [[1]]$gene
#> [1] "ACRJP618-11"
#>
#> [[1]]$sequence
#> [1] "------------------------TTGAGCAGGCATAGTAGGAACTTCTCTTAGTCTTATTATTCGAACAGAATTAGGAAATCCAGGATTTTTAATTGGAGATGATCAAATCTACAATACTATTGTTACGGCTCATGCTTTTATTATAATTTTTTTTATAGTTATACCTATTATAATTGGAGGATTTGGTAATTGATTAGTTCCCCTTATACTAGGAGCCCCAGATATAGCTTTCCCTCGAATAAACAATATAAGTTTTTGGCTTCTTCCCCCTTCACTATTACTTTTAATTTCCAGAAGAATTGTTGAAAATGGAGCTGGAACTGGATGAACAGTTTATCCCCCACTGTCATCTAATATTGCCCATAGAGGTACATCAGTAGATTTAGCTATTTTTTCTTTACATTTAGCAGGTATTTCCTCTATTTTAGGAGCGATTAATTTTATTACTACAATTATTAATATACGAATTAACAGTATAAATTATGATCAAATACCACTATTTGTGTGATCAGTAGGAATTACTGCTTTACTCTTATTACTTTCTCTTCCAGTATTAGCAGGTGCTATCACTATATTATTAACGGATCGAAATTTAAATACATCATTTTTTGATCCTGCAGGAGGAGGAGATCCAATTTTATATCAACATTTATTT"
#>
#>
#> [[2]]
#> [[2]]$id
#> [1] "ACRJP619-11"
#>
#> [[2]]$name
#> [1] "Lepidoptera"
#>
#> [[2]]$gene
#> [1] "ACRJP619-11"
#>
#> [[2]]$sequence
#> [1] "AACTTTATATTTTATTTTTGGTATTTGAGCAGGCATAGTAGGAACTTCTCTTAGTCTTATTATTCGAACAGAATTAGGAAATCCAGGATTTTTAATTGGAGATGATCAAATCTACAATACTATTGTTACGGCTCATGCTTTTATTATAATTTTTTTTATAGTTATACCTATTATAATTGGAGGATTTGGTAATTGATTAGTTCCCCTTATACTAGGAGCCCCAGATATAGCTTTCCCTCGAATAAACAATATAAGTTTTTGGCTTCTTCCCCCTTCACTATTACTTTTAATTTCCAGAAGAATTGTTGAAAATGGAGCTGGAACTGGATGAACAGTTTATCCCCCACTGTCATCTAATATTGCCCATAGAGGTACATCAGTAGATTTAGCTATTTTTTCTTTACATTTAGCAGGTATTTCCTCTATTTTAGGAGCGATTAATTTTATTACTACAATTATTAATATACGAATTAACAGTATAAATTATGATCAAATACCACTATTTGTGTGATCAGTAGGAATTACTGCTTTACTCTTATTACTTTCTCTTCCAGTATTAGCAGGTGCTATCACTATATTATTAACGGATCGAAATTTAAATACATCATTTTTTGATCCTGCAGGAGGAGGAGATCCAATTTTATATCAACATTTATTT"
by container (containers include project codes and dataset codes)
bold_seq(container = 'ACRJP')[[1]]
#> $id
#> [1] "ACRJP003-09"
#>
#> $name
#> [1] "Lepidoptera"
#>
#> $gene
#> [1] "ACRJP003-09"
#>
#> $sequence
#> [1] "AACATTATATTTTATTTTTGGGATCTGATCTGGAATAGTAGGGACATCTTTAAGTATACTAATTCGAATAGAACTAGGAAATCCTGGATGTTTAATTGGGGATGATCAAATTTATAATACTATTGTTACAGCTCATGCTTTTATTATAATTTTTTTTATAGTTATACCCATTATAATTGGAGGTTTTGGCAATTGACTTGTACCATTAATATTAGGAGCCCCTGATATAGCATTTCCCCGAATAAATAATATAAGATTTTGACTTCTTCCCCCCTCATTAATTTTATTAATTTCAAGAAGAATTGTTGAAAATGGAGCAGGAACAGGATGAACAGTCTATCCTCCATTATCTTCTAATATTGCGCATAGAGGATCCTCTGTTGATTTAGCTATTTTCTCACTTCATTTAGCAGGAATTTCTTCTATTTTAGGAGCAATTAATTTTATTACAACTATTATTAATATACGAATAAATAATTTACTTTTTGACCAAATACCTCTATTTGTTTGAGCAGTAGGTATTACAGCTGTTCTTCTTTTATTATCATTACCAGTATTAGCAGGAGCAATTACCATACTATTAACAGATCGTAATTTAAATACTTCTTTCTTTGATCCTGCTGGAGGAGGAGATCCAATTTTATACCAACATTTATTT"
by bin (a bin is a Barcode Index Number)
bold_seq(bin = 'BOLD:AAA5125')[[1]]
#> $id
#> [1] "BLPAB406-06"
#>
#> $name
#> [1] "Eacles ormondei"
#>
#> $gene
#> [1] "BLPAB406-06"
#>
#> $sequence
#> [1] "AACTTTATATTTTATTTTTGGAATTTGAGCAGGTATAGTAGGAACTTCTTTAAGATTACTAATTCGAGCAGAATTAGGTACCCCCGGATCTTTAATTGGAGATGACCAAATTTATAATACCATTGTAACAGCTCATGCTTTTATTATAATTTTTTTTATAGTTATACCTATTATAATTGGAGGATTTGGAAATTGATTAGTACCCCTAATACTAGGAGCTCCTGATATAGCTTTCCCCCGAATAAATAATATAAGATTTTGACTATTACCCCCATCTTTAACTCTTTTAATTTCTAGAAGAATTGTCGAAAATGGAGCTGGAACTGGATGAACAGTTTATCCCCCCCTTTCATCTAATATTGCTCATGGAGGCTCTTCTGTTGATTTAGCTATTTTTTCCCTTCATCTAGCTGGAATCTCATCAATTTTAGGAGCTATTAATTTTATCACAACAATCATTAATATACGACTAAATAATATAATATTTGACCAAATACCTTTATTTGTATGAGCTGTTGGTATTACAGCATTTCTTTTATTGTTATCTTTACCTGTACTAGCTGGAGCTATTACTATACTTTTAACAGATCGAAACTTAAATACATCATTTTTTGACCCAGCAGGAGGAGGAGATCCTATTCTCTATCAACATTTATTT"
And there are more ways to query, check out the docs for ?bold_seq
.
Search for specimen data only
The BOLD specimen API doesn't give back sequences, only specimen data. By default you download tsv
format data, which is given back to you as a data.frame
res <- bold_specimens(taxon = 'Osmia')
head(res[,1:8])
#> processid sampleid recordID catalognum fieldnum
#> 1 ASGCB261-13 BIOUG07489-F10 3955538 BIOUG07489-F10
#> 2 BCHYM1499-13 BC ZSM HYM 19359 4005348 BC ZSM HYM 19359 BC ZSM HYM 19359
#> 3 BCHYM412-13 BC ZSM HYM 18272 3896353 BC ZSM HYM 18272 BC ZSM HYM 18272
#> 4 BCHYM413-13 BC ZSM HYM 18273 3896354 BC ZSM HYM 18273 BC ZSM HYM 18273
#> 5 FBAPB706-09 BC ZSM HYM 02181 1289067 BC ZSM HYM 02181 BC ZSM HYM 02181
#> 6 FBAPB730-09 BC ZSM HYM 02205 1289091 BC ZSM HYM 02205 BC ZSM HYM 02205
#> institution_storing bin_uri phylum_taxID
#> 1 Biodiversity Institute of Ontario BOLD:AAB8874 20
#> 2 SNSB, Zoologische Staatssammlung Muenchen BOLD:AAD6282 20
#> 3 SNSB, Zoologische Staatssammlung Muenchen BOLD:AAP2416 20
#> 4 SNSB, Zoologische Staatssammlung Muenchen BOLD:AAP2416 20
#> 5 SNSB, Zoologische Staatssammlung Muenchen BOLD:AAE4126 20
#> 6 SNSB, Zoologische Staatssammlung Muenchen BOLD:AAK5820 20
You can optionally get back the data in XML
format
bold_specimens(taxon = 'Osmia', format = 'xml')
<?xml version="1.0" encoding="UTF-8"?>
<bold_records xsi:noNamespaceSchemaLocation="http://www.boldsystems.org/schemas/BOLDPublic_record.xsd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<record>
<record_id>1470124</record_id>
<processid>BOM1525-10</processid>
<bin_uri>BOLD:AAN3337</bin_uri>
<specimen_identifiers>
<sampleid>DHB 1011</sampleid>
<catalognum>DHB 1011</catalognum>
<fieldnum>DHB1011</fieldnum>
<institution_storing>Marjorie Barrick Museum</institution_storing>
</specimen_identifiers>
<taxonomy>
You can choose to get the httr
response object back if you'd rather work with the raw data returned from the BOLD API.
res <- bold_specimens(taxon = 'Osmia', format = 'xml', response = TRUE)
res$url
#> [1] "http://v4.boldsystems.org/index.php/API_Public/specimen?taxon=Osmia&format=xml"
res$status_code
#> [1] 200
res$headers
#> NULL
Search for specimen plus sequence data
The specimen/sequence combined API gives back specimen and sequence data. Like the specimen API, this one gives by default tsv
format data, which is given back to you as a data.frame
. Here, we're setting sepfasta=TRUE
so that the sequence data is given back as a list, and taken out of the data.frame
returned so the data.frame
is more manageable.
res <- bold_seqspec(taxon = 'Osmia', sepfasta = TRUE)
res$fasta[1:2]
#> $`ASGCB261-13`
#> [1] "AATTTTATATATAATTTTTGCTATATGATCAGGAATAATTGGTTCAGCAATAAGAATTATTATTCGAATAGAATTAAGAATTCCTGGTTCATGAATTTCAAATGATCAAACTTATAATTCTTTAGTTACTGCTCATGCTTTTTTAATAATTTTTTTCTTAGTTATACCATTCTTAATTGGGGGATTTGGAAATTGATTAATTCCTTTAATATTAGGAATTCCAGATATAGCATTTCCACGAATAAATAATATTAGATTTTGACTTTTACCTCCTTCTTTAATACTTTTATTATTAAGAAATTTTATAAATCCTAGTCCAGGAACTGGATGAACTGTTTATCCACCTTTATCTTCTCATTTATTTCATTCTTCTCCTTCAGTTGATATAGCTATTTTTTCTTTACATATTTCTGGTTTATCTTCTATTATAGGTTCATTAAATTTTATTGTTACAATTATTATAATAAAAAATATTTCTTTAAAACATATTCAATTACCTTTATTTCCTTGATCTGTCTTTATTACTACTATTTTATTACTTTTTTCTTTACCTGTTTTAGCAGGTGCAATTACTATATTATTATTTGATCGAAATTTTAATACTTCATTTTTTGATCCTACAGGAGGAGGAGATCCTATTCTTTATCAACATTTATTT"
#>
#> $`BCHYM1499-13`
#> [1] "AATTCTTTACATAATTTTTGCTTTATGATCTGGAATAATTGGGTCAGCAATAAGAATTATTATTCGAATAGAATTAAGTATCCCAGGTTCATGAATTACTAATGATCAAATTTATAATTCTTTAGTAACTGCACATGCTTTTTTAATAATTTTTTTTCTTGTGATACCATTTTTAATTGGAGGATTTGGAAATTGATTAATTCCTTTAATATTAGGAATTCCAGATATAGCTTTCCCACGAATAAACAATATTAGATTTTGATTATTACCGCCATCTTTAATATTATTACTTTTAAGAAATTTTTTAAATCCAAGTCCTGGAACAGGATGAACAGTTTATCCCCCTTTATCATCAAATTTATTTCATTCTTCTCCTTCAGTTGATTTAGCAATTTTTTCTTTACATATTTCAGGTTTATCTTCTATTATAGGTTCATTAAATTTTATTGTTACAATTATTATAATAAAAAATATTTCTTTAAAATATATTCAATTGCCTTTATTTCCTTGATCTGTATTTATTACTACTATTCTTTTATTATTTTCTTTACCTGTGTTAGCTGGAGCTATTACTATATTATTATTTGATCGAAATTTTAATACATCTTTTTTTGATCCTACAGGAGGAGGAGATCCAATTCTTTATCAACATTTATTT"
Or you can index to a specific sequence like
res$fasta['GBAH0293-06']
#> $`GBAH0293-06`
#> [1] "------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------TTAATGTTAGGGATTCCAGATATAGCTTTTCCACGAATAAATAATATTAGATTTTGACTGTTACCTCCATCTTTAATATTATTACTTTTAAGAAATTTTTTAAATCCAAGTCCTGGAACAGGATGAACAGTTTATCCTCCTTTATCATCAAATTTATTTCATTCTTCTCCTTCAGTTGATTTAGCAATTTTTTCTTTACATATTTCAGGTTTATCTTCTATTATAGGTTCATTAAATTTTATTGTTACAATTATTATAATAAAAAATATTTCTTTAAAATATATTCAATTACCTTTATTTTCTTGATCTGTATTTATTACTACTATTCTTTTATTATTTTCTTTACCTGTATTAGCTGGAGCTATTACTATATTATTATTTGATCGAAATTTTAATACATCTTTTTTTGATCCAACAGGAGGGGGAGATCCAATTCTTTATCAACATTTATTTTGATTTTTTGGTCATCCTGAAGTTTATATTTTAATTTTACCTGGATTTGGATTAATTTCTCAAATTATTTCTAATGAAAGAGGAAAAAAAGAAACTTTTGGAAATATTGGTATAATTTATGCTATATTAAGAATTGGACTTTTAGGTTTTATTGTT---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"
Get trace files
This function downloads files to your machine - it does not load them into your R session - but prints out where the files are for your information.
bold_trace(taxon = 'Osmia', quiet = TRUE)
bold/inst/doc/bold_vignette.Rmd 0000644 0001762 0000144 00000047747 13134226657 016241 0 ustar ligges users
`bold` is an R package to connect to [BOLD Systems](http://www.boldsystems.org/) via their API. Functions in `bold` let you search for sequence data, specimen data, sequence + specimen data, and download raw trace files.
### bold info
+ [BOLD home page](http://boldsystems.org/)
+ [BOLD API docs](http://v4.boldsystems.org/index.php/api_home)
### Using bold
**Install**
Install `bold` from CRAN
```r
install.packages("bold")
```
Or install the development version from GitHub
```r
devtools::install_github("ropensci/bold")
```
Load the package
```r
library("bold")
```
### Search for taxonomic names via names
`bold_tax_name` searches for names with names.
```r
bold_tax_name(name = 'Diplura')
#> input taxid taxon tax_rank tax_division parentid parentname
#> 1 Diplura 591238 Diplura order Animals 82 Insecta
#> 2 Diplura 603673 Diplura genus Protists 53974 Scytosiphonaceae
#> taxonrep
#> 1 Diplura
#> 2
```
```r
bold_tax_name(name = c('Diplura', 'Osmia'))
#> input taxid taxon tax_rank tax_division parentid parentname
#> 1 Diplura 591238 Diplura order Animals 82 Insecta
#> 2 Diplura 603673 Diplura genus Protists 53974 Scytosiphonaceae
#> 3 Osmia 4940 Osmia genus Animals 4962 Megachilinae
#> taxonrep
#> 1 Diplura
#> 2
#> 3 Osmia
```
### Search for taxonomic names via BOLD identifiers
`bold_tax_id` searches for names with BOLD identifiers.
```r
bold_tax_id(id = 88899)
#> input taxid taxon tax_rank tax_division parentid parentname
#> 1 88899 88899 Momotus genus Animals 88898 Momotidae
```
```r
bold_tax_id(id = c(88899, 125295))
#> input taxid taxon tax_rank tax_division parentid parentname
#> 1 88899 88899 Momotus genus Animals 88898 Momotidae
#> 2 125295 125295 Helianthus genus Plants 100962 Asteraceae
```
### Search for sequence data only
The BOLD sequence API gives back sequence data, with a bit of metadata.
The default is to get a list back
```r
bold_seq(taxon = 'Coelioxys')[1:2]
#> [[1]]
#> [[1]]$id
#> [1] "FBAPB491-09"
#>
#> [[1]]$name
#> [1] "Coelioxys conica"
#>
#> [[1]]$gene
#> [1] "FBAPB491-09"
#>
#> [[1]]$sequence
#> [1] "---------------------ACCTCTTTAAGAATAATTATTCGTATAGAAATAAGAATTCCAGGATCTTGAATTAATAATGATCAAATTTATAACTCCTTTATTACAGCACATGCATTTTTAATAATTTTTTTTTTAGTTATACCTTTTCTTATTGGAGGATTTGGAAATTGATTAGTACCTTTAATATTAGGATCACCAGATATAGCTTTCCCACGAATAAATAATATTAGATTTTGATTATTACCTCCTTCTTTATTAATATTATTATTAAGTAATTTAATAAATCCCAGACCAGGAACAGGCTGAACAGTTTATCCTCCTTTATCTTTATACACATACCACCCTTCTCCCTCAGTTGATTTAGCAATTTTTTCACTACATCTATCAGGAATCTCTTCTATTATTGGATCTATAAATTTTATTGTTACAATTTTAATAATAAAAAACTTTTCAATAAATTATAATCAAATACCATTATTCCCATGATCTATTTTAATTACTACTATTTTATTATTATTATCACTACCTGTATTAGCTGGTGCTATTACTATATTATTATTTGATCGAAATTTAAATTCTTCTTTTTTTGACCCTATAGGAGGAGGAGACCCAATTTTATACCAACATTTA"
#>
#>
#> [[2]]
#> [[2]]$id
#> [1] "FBAPC351-10"
#>
#> [[2]]$name
#> [1] "Coelioxys afra"
#>
#> [[2]]$gene
#> [1] "FBAPC351-10"
#>
#> [[2]]$sequence
#> [1] "---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ACGAATAAATAATGTAAGATTTTGACTATTACCTCCCTCAATTTTCTTATTATTATCAAGAACCCTAATTAACCCAAGAGCTGGTACTGGATGAACTGTATATCCTCCTTTATCCTTATATACATTTCATGCCTCACCTTCCGTTGATTTAGCAATTTTTTCACTTCATTTATCAGGAATTTCATCAATTATTGGATCAATAAATTTTATTGTTACAATCTTAATAATAAAAAATTTTTCTTTAAATTATAGACAAATACCATTATTTTCATGATCAGTTTTAATTACTACAATTTTACTTTTATTATCATTACCAATTTTAGCTGGAGCAATTACTATACTCCTATTTGATCGAAATTTAAATACCTCATTCTTTGACCCAATAGGAGGAGGAGATCCAATTTTATATCAACATTTATTT"
```
You can optionally get back the `httr` response object
```r
res <- bold_seq(taxon = 'Coelioxys', response = TRUE)
res$headers
#> $date
#> [1] "Tue, 15 Sep 2015 20:02:31 GMT"
#>
#> $server
#> [1] "Apache/2.2.15 (Red Hat)"
#>
#> $`x-powered-by`
#> [1] "PHP/5.3.15"
#>
#> $`content-disposition`
#> [1] "attachment; filename=fasta.fas"
#>
#> $connection
#> [1] "close"
#>
#> $`transfer-encoding`
#> [1] "chunked"
#>
#> $`content-type`
#> [1] "application/x-download"
#>
#> attr(,"class")
#> [1] "insensitive" "list"
```
You can do geographic searches
```r
bold_seq(geo = "USA")
#> [[1]]
#> [[1]]$id
#> [1] "GBAN1777-08"
#>
#> [[1]]$name
#> [1] "Macrobdella decora"
#>
#> [[1]]$gene
#> [1] "GBAN1777-08"
#>
#> [[1]]$sequence
#> [1] "---------------------------------ATTGGAATCTTGTATTTCTTATTAGGTACATGATCTGCTATAGTAGGGACCTCTATA---AGAATAATTATTCGAATTGAATTAGCTCAACCTGGGTCGTTTTTAGGAAAT---GATCAAATTTACAATACTATTGTTACTGCTCATGGATTAATTATAATTTTTTTTATAGTAATACCTATTTTAATTGGAGGGTTTGGTAATTGATTAATTCCGCTAATA---ATTGGTTCTCCTGATATAGCTTTTCCACGTCTTAATAATTTAAGATTTTGATTACTTCCGCCATCTTTAACTATACTTTTTTGTTCATCTATAGTCGAAAATGGAGTAGGTACTGGATGGACTATTTACCCTCCTTTAGCAGATAACATTGCTCATTCTGGACCTTCTGTAGATATA---GCAATTTTTTCACTTCATTTAGCTGGTGCTTCTTCTATTTTAGGTTCATTAAATTTTATTACTACTGTAGTTAATATACGATGACCAGGGATATCTATAGAGCGAATTCCTTTATTTATTTGATCCGTAATTATTACTACTGTATTGCTATTATTATCTTTACCAGTATTAGCAGCT---GCTATTTCAATATTATTAACAGATCGTAACTTAAATACTAGATTTTTTGACCCAATAGGAGGAGGGGATCCTATTTTATTCCAACATTTATTTTGATTTTTTGGCCACCCTGAAGTTTATATTTTAATTTTACCAGGATTTGGAGCTATTTCTCATGTAGTAAGTCATAACTCT---AAAAAATTAGAACCGTTTGGATCATTAGGGATATTATATGCAATAATTGGAATTGCAATTTTAGGTTTTATTGTTTGAGCACATCATATATTTACAGTAGGTCTTGATGTAGATACACGAGCTTATTTTACAGCAGCTACAATAGTTATTGCTGTTCCTACAGGAATTAAAGTATTTAGGTGATTG---GCAACT"
#>
#>
#> [[2]]
#> [[2]]$id
#> [1] "GBAN1780-08"
#>
#> [[2]]$name
#> [1] "Haemopis terrestris"
#>
#> [[2]]$gene
#> [1] "GBAN1780-08"
#>
#> [[2]]$sequence
#> [1] "---------------------------------ATTGGAACWTTWTATTTTATTTTNGGNGCTTGATCTGCTATATTNGGGATCTCAATA---AGGAATATTATTCGAATTGAGCCATCTCAACCTGGGAGATTATTAGGAAAT---GATCAATTATATAATTCATTAGTAACAGCTCATGGATTAATTATAATTTTCTTTATGGTTATGCCTATTTTGATTGGTGGGTTTGGTAATTGATTACTACCTTTAATA---ATTGGAGCCCCTGATATAGCTTTTCCTCGATTAAATAATTTAAGTTTTTGATTATTACCACCTTCATTAATTATATTGTTAAGATCCTCTATTATTGAAAGAGGGGTAGGTACAGGTTGAACCTTATATCCTCCTTTAGCAGATAGATTATTTCATTCAGGTCCATCGGTAGATATA---GCTATTTTTTCATTACATATAGCTGGAGCATCATCTATTTTAGGCTCATTAAACTTTATTTCTACAATTATTAATATACGAATTAAAGGTATAAGATCTGATCGAGTACCTTTATTTGTATGATCAGTTGTTATTACAACAGTTCTGTTATTATTGTCTTTACCTGTTTTAGCTGCA---GCTATTACTATATTATTAACAGATCGTAATTTAAATACTACTTTTTTTGATCCTATAGGAGGTGGAGATCCAGTATTGTTTCAACACTTATTTTGATTTTTTGGTCATCCAGAAGTATATATTTTGATTTTACCAGGATTTGGAGCAATTTCTCATATTATTACAAATAATTCT---AAAAAATTGGAACCTTTTGGATCTCTTGGTATAATTTATGCTATAATTGGAATTGCAGTTTTAGGGTTTATTGTATGAGCCCATCATATATTTACTGTAGGATTAGATGTTGATACTCGAGCTTATTTTACAGCAGCTACTATAGTTATTGCTGTTCCTACTGGTATTAAAGTTTTTAGGTGATTA---GCAACA"
#>
#>
#> [[3]]
#> [[3]]$id
#> [1] "GBNM0293-06"
#>
#> [[3]]$name
#> [1] "Steinernema carpocapsae"
#>
#> [[3]]$gene
#> [1] "GBNM0293-06"
#>
#> [[3]]$sequence
#> [1] "---------------------------------------------------------------------------------ACAAGATTATCTCTTATTATTCGTTTAGAGTTGGCTCAACCTGGTCTTCTTTTGGGTAAT---GGTCAATTATATAATTCTATTATTACTGCTCATGCTATTCTTATAATTTTTTTCATAGTTATACCTAGAATAATTGGTGGTTTTGGTAATTGAATATTACCTTTAATATTGGGGGCTCCTGATATAAGTTTTCCACGTTTGAATAATTTAAGTTTTTGATTGCTACCAACTGCTATATTTTTGATTTTAGATTCTTGTTTTGTTGACACTGGTTGTGGTACTAGTTGAACTGTTTATCCTCCTTTGAGG---ACTTTAGGTCACCCTGGYAGAAGTGTAGATTTAGCTATTTTTAGTCTTCATTGTGCAGGAATTAGCTCAATTTTAGGGGCTATTAATTTTATATGTACTACAAAAAATCTTCGTAGTAGTTCTATTTCTTTGGAACATATAAGACTTTTTGTTTGGGCTGTTTTTGTTACTGTTTTTTTATTAGTTTTATCTTTACCTGTTTTAGCTGGTGCTATTACTATGCTTTTAACAGACCGTAATTTAAATACTTCTTTTTTT------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"
#>
#>
#> [[4]]
#> [[4]]$id
#> [1] "NEONV108-11"
#>
#> [[4]]$name
#> [1] "Aedes thelcter"
#>
#> [[4]]$gene
#> [1] "NEONV108-11"
#>
#> [[4]]$sequence
#> [1] "AACTTTATACTTCATCTTCGGAGTTTGATCAGGAATAGTTGGTACATCATTAAGAATTTTAATTCGTGCTGAATTAAGTCAACCAGGTATATTTATTGGAAATGACCAAATTTATAATGTAATTGTTACAGCTCATGCTTTTATTATAATTTTCTTTATAGTTATACCTATTATAATTGGAGGATTTGGAAATTGACTAGTTCCTCTAATATTAGGAGCCCCAGATATAGCTTTCCCTCGAATAAATAATATAAGTTTTTGAATACTACCTCCCTCATTAACTCTTCTACTTTCAAGTAGTATAGTAGAAAATGGATCAGGAACAGGATGAACAGTTTATCCACCTCTTTCATCTGGAACTGCTCATGCAGGAGCCTCTGTTGATTTAACTATTTTTTCTCTTCATTTAGCCGGAGTTTCATCAATTTTAGGGGCTGTAAATTTTATTACTACTGTAATTAATATACGATCTGCAGGAATTACTCTTGATCGACTACCTTTATTCGTTTGATCTGTAGTAATTACAGCTGTTTTATTACTTCTTTCACTTCCTGTATTAGCTGGAGCTATTACAATACTATTAACTGATCGAAATTTAAATACATCTTTCTTTGATCCAATTGGAGGAGGAGACCCAATTTTATACCAACATTTATTT"
#>
#>
#> [[5]]
#> [[5]]$id
#> [1] "NEONV109-11"
#>
#> [[5]]$name
#> [1] "Aedes thelcter"
#>
#> [[5]]$gene
#> [1] "NEONV109-11"
#>
#> [[5]]$sequence
#> [1] "AACTTTATACTTCATCTTCGGAGTTTGATCAGGAATAGTTGGTACATCATTAAGAATTTTAATTCGTGCTGAATTAAGTCAACCAGGTATATTTATTGGAAATGACCAAATTTATAATGTAATTGTTACAGCTCATGCTTTTATTATAATTTTCTTTATAGTTATACCTATTATAATTGGAGGATTTGGAAATTGACTAGTTCCTCTAATATTAGGAGCCCCAGATATAGCTTTCCCTCGAATAAATAATATAAGTTTTTGAATACTACCTCCCTCATTAACTCTTCTACTTTCAAGTAGTATAGTAGAAAATGGGTCAGGAACAGGATGAACAGTTTATCCACCTCTTTCATCTGGAACTGCTCATGCAGGAGCCTCTGTTGATTTAACTATTTTTTCTCTTCATTTAGCCGGAGTTTCATCAATTTTAGGGGCTGTAAATTTTATTACTACTGTAATTAATATACGATCTGCAGGAATTACTCTTGATCGACTACCTTTATTCGTTTGATCTGTAGTAATTACAGCTGTTTTATTACTTCTTTCACTTCCTGTATTAGCTGGAGCTATTACAATACTATTAACTGATCGAAATTTAAATACATCTTTCTTTGACCCAATTGGAGGGGGAGACCCAATTTTATACCAACATTTATTT"
```
And you can search by researcher name
```r
bold_seq(researchers = 'Thibaud Decaens')[[1]]
#> $id
#> [1] "BGABA657-14"
#>
#> $name
#> [1] "Coleoptera"
#>
#> $gene
#> [1] "BGABA657-14"
#>
#> $sequence
#> [1] "ACACTCTATTTCATTTTCGGAGCTTGATCAGGAATAGTAGGAACTTCTTTAAGAATACTAATTCGATCTGAATTGGGAAACCCCGGCTCATTGATTGGGGATGATCAAATTTATAATGTTATTGTAACAGCCCATGCATTCATTATAATTTTTTTTATAGTAATACCGATCATAATAGGAGGTTTTGGAAATTGATTAGTCCCGCTAATATTAGGTGCCCCAGATATAGCATTTCCTCGAATAAATAATATAAGATTTTGACTTCTTCCGCCTTCATTAACTTTACTTATTATAAGAAGAATTGTAGAAAACGGGGCGGGAACAGGATGAACAGTTTACCCACCCCTCTCTTCTAACATTGCTCATAGAGGAGCCTCTGTAGATCTTGCAATTTTTAGATTACATTTAGCCGGTGTATCATCAATTTTAGGTGCAGTTAATTTTATTACAACTATTATTAATATACGACCTAAAGGAATAACATTTGATCGCATACCTTTATTTGTATGAGCTGTAGCTTTAACTGCATTACTTTTATTATTATCTTTACCAGTATTAGCAGGTGCAATTACAATACTTTTAACTGATCGA---------------------------------------"
```
by taxon IDs
```r
bold_seq(ids = c('ACRJP618-11', 'ACRJP619-11'))
#> [[1]]
#> [[1]]$id
#> [1] "ACRJP618-11"
#>
#> [[1]]$name
#> [1] "Lepidoptera"
#>
#> [[1]]$gene
#> [1] "ACRJP618-11"
#>
#> [[1]]$sequence
#> [1] "------------------------TTGAGCAGGCATAGTAGGAACTTCTCTTAGTCTTATTATTCGAACAGAATTAGGAAATCCAGGATTTTTAATTGGAGATGATCAAATCTACAATACTATTGTTACGGCTCATGCTTTTATTATAATTTTTTTTATAGTTATACCTATTATAATTGGAGGATTTGGTAATTGATTAGTTCCCCTTATACTAGGAGCCCCAGATATAGCTTTCCCTCGAATAAACAATATAAGTTTTTGGCTTCTTCCCCCTTCACTATTACTTTTAATTTCCAGAAGAATTGTTGAAAATGGAGCTGGAACTGGATGAACAGTTTATCCCCCACTGTCATCTAATATTGCCCATAGAGGTACATCAGTAGATTTAGCTATTTTTTCTTTACATTTAGCAGGTATTTCCTCTATTTTAGGAGCGATTAATTTTATTACTACAATTATTAATATACGAATTAACAGTATAAATTATGATCAAATACCACTATTTGTGTGATCAGTAGGAATTACTGCTTTACTCTTATTACTTTCTCTTCCAGTATTAGCAGGTGCTATCACTATATTATTAACGGATCGAAATTTAAATACATCATTTTTTGATCCTGCAGGAGGAGGAGATCCAATTTTATATCAACATTTATTT"
#>
#>
#> [[2]]
#> [[2]]$id
#> [1] "ACRJP619-11"
#>
#> [[2]]$name
#> [1] "Lepidoptera"
#>
#> [[2]]$gene
#> [1] "ACRJP619-11"
#>
#> [[2]]$sequence
#> [1] "AACTTTATATTTTATTTTTGGTATTTGAGCAGGCATAGTAGGAACTTCTCTTAGTCTTATTATTCGAACAGAATTAGGAAATCCAGGATTTTTAATTGGAGATGATCAAATCTACAATACTATTGTTACGGCTCATGCTTTTATTATAATTTTTTTTATAGTTATACCTATTATAATTGGAGGATTTGGTAATTGATTAGTTCCCCTTATACTAGGAGCCCCAGATATAGCTTTCCCTCGAATAAACAATATAAGTTTTTGGCTTCTTCCCCCTTCACTATTACTTTTAATTTCCAGAAGAATTGTTGAAAATGGAGCTGGAACTGGATGAACAGTTTATCCCCCACTGTCATCTAATATTGCCCATAGAGGTACATCAGTAGATTTAGCTATTTTTTCTTTACATTTAGCAGGTATTTCCTCTATTTTAGGAGCGATTAATTTTATTACTACAATTATTAATATACGAATTAACAGTATAAATTATGATCAAATACCACTATTTGTGTGATCAGTAGGAATTACTGCTTTACTCTTATTACTTTCTCTTCCAGTATTAGCAGGTGCTATCACTATATTATTAACGGATCGAAATTTAAATACATCATTTTTTGATCCTGCAGGAGGAGGAGATCCAATTTTATATCAACATTTATTT"
```
by container (containers include project codes and dataset codes)
```r
bold_seq(container = 'ACRJP')[[1]]
#> $id
#> [1] "ACRJP003-09"
#>
#> $name
#> [1] "Lepidoptera"
#>
#> $gene
#> [1] "ACRJP003-09"
#>
#> $sequence
#> [1] "AACATTATATTTTATTTTTGGGATCTGATCTGGAATAGTAGGGACATCTTTAAGTATACTAATTCGAATAGAACTAGGAAATCCTGGATGTTTAATTGGGGATGATCAAATTTATAATACTATTGTTACAGCTCATGCTTTTATTATAATTTTTTTTATAGTTATACCCATTATAATTGGAGGTTTTGGCAATTGACTTGTACCATTAATATTAGGAGCCCCTGATATAGCATTTCCCCGAATAAATAATATAAGATTTTGACTTCTTCCCCCCTCATTAATTTTATTAATTTCAAGAAGAATTGTTGAAAATGGAGCAGGAACAGGATGAACAGTCTATCCTCCATTATCTTCTAATATTGCGCATAGAGGATCCTCTGTTGATTTAGCTATTTTCTCACTTCATTTAGCAGGAATTTCTTCTATTTTAGGAGCAATTAATTTTATTACAACTATTATTAATATACGAATAAATAATTTACTTTTTGACCAAATACCTCTATTTGTTTGAGCAGTAGGTATTACAGCTGTTCTTCTTTTATTATCATTACCAGTATTAGCAGGAGCAATTACCATACTATTAACAGATCGTAATTTAAATACTTCTTTCTTTGATCCTGCTGGAGGAGGAGATCCAATTTTATACCAACATTTATTT"
```
by bin (a bin is a _Barcode Index Number_)
```r
bold_seq(bin = 'BOLD:AAA5125')[[1]]
#> $id
#> [1] "BLPAB406-06"
#>
#> $name
#> [1] "Eacles ormondei"
#>
#> $gene
#> [1] "BLPAB406-06"
#>
#> $sequence
#> [1] "AACTTTATATTTTATTTTTGGAATTTGAGCAGGTATAGTAGGAACTTCTTTAAGATTACTAATTCGAGCAGAATTAGGTACCCCCGGATCTTTAATTGGAGATGACCAAATTTATAATACCATTGTAACAGCTCATGCTTTTATTATAATTTTTTTTATAGTTATACCTATTATAATTGGAGGATTTGGAAATTGATTAGTACCCCTAATACTAGGAGCTCCTGATATAGCTTTCCCCCGAATAAATAATATAAGATTTTGACTATTACCCCCATCTTTAACTCTTTTAATTTCTAGAAGAATTGTCGAAAATGGAGCTGGAACTGGATGAACAGTTTATCCCCCCCTTTCATCTAATATTGCTCATGGAGGCTCTTCTGTTGATTTAGCTATTTTTTCCCTTCATCTAGCTGGAATCTCATCAATTTTAGGAGCTATTAATTTTATCACAACAATCATTAATATACGACTAAATAATATAATATTTGACCAAATACCTTTATTTGTATGAGCTGTTGGTATTACAGCATTTCTTTTATTGTTATCTTTACCTGTACTAGCTGGAGCTATTACTATACTTTTAACAGATCGAAACTTAAATACATCATTTTTTGACCCAGCAGGAGGAGGAGATCCTATTCTCTATCAACATTTATTT"
```
And there are more ways to query, check out the docs for `?bold_seq`.
### Search for specimen data only
The BOLD specimen API doesn't give back sequences, only specimen data. By default you download `tsv` format data, which is given back to you as a `data.frame`
```r
res <- bold_specimens(taxon = 'Osmia')
head(res[,1:8])
#> processid sampleid recordID catalognum fieldnum
#> 1 ASGCB261-13 BIOUG07489-F10 3955538 BIOUG07489-F10
#> 2 BCHYM1499-13 BC ZSM HYM 19359 4005348 BC ZSM HYM 19359 BC ZSM HYM 19359
#> 3 BCHYM412-13 BC ZSM HYM 18272 3896353 BC ZSM HYM 18272 BC ZSM HYM 18272
#> 4 BCHYM413-13 BC ZSM HYM 18273 3896354 BC ZSM HYM 18273 BC ZSM HYM 18273
#> 5 FBAPB706-09 BC ZSM HYM 02181 1289067 BC ZSM HYM 02181 BC ZSM HYM 02181
#> 6 FBAPB730-09 BC ZSM HYM 02205 1289091 BC ZSM HYM 02205 BC ZSM HYM 02205
#> institution_storing bin_uri phylum_taxID
#> 1 Biodiversity Institute of Ontario BOLD:AAB8874 20
#> 2 SNSB, Zoologische Staatssammlung Muenchen BOLD:AAD6282 20
#> 3 SNSB, Zoologische Staatssammlung Muenchen BOLD:AAP2416 20
#> 4 SNSB, Zoologische Staatssammlung Muenchen BOLD:AAP2416 20
#> 5 SNSB, Zoologische Staatssammlung Muenchen BOLD:AAE4126 20
#> 6 SNSB, Zoologische Staatssammlung Muenchen BOLD:AAK5820 20
```
You can optionally get back the data in `XML` format
```r
bold_specimens(taxon = 'Osmia', format = 'xml')
```
```r
1470124
BOM1525-10
BOLD:AAN3337
DHB 1011
DHB 1011
DHB1011
Marjorie Barrick Museum
```
You can choose to get the `httr` response object back if you'd rather work with the raw data returned from the BOLD API.
```r
res <- bold_specimens(taxon = 'Osmia', format = 'xml', response = TRUE)
res$url
#> [1] "http://v4.boldsystems.org/index.php/API_Public/specimen?taxon=Osmia&format=xml"
res$status_code
#> [1] 200
res$headers
#> NULL
```
### Search for specimen plus sequence data
The specimen/sequence combined API gives back specimen and sequence data. Like the specimen API, this one gives by default `tsv` format data, which is given back to you as a `data.frame`. Here, we're setting `sepfasta=TRUE` so that the sequence data is given back as a list, and taken out of the `data.frame` returned so the `data.frame` is more manageable.
```r
res <- bold_seqspec(taxon = 'Osmia', sepfasta = TRUE)
res$fasta[1:2]
#> $`ASGCB261-13`
#> [1] "AATTTTATATATAATTTTTGCTATATGATCAGGAATAATTGGTTCAGCAATAAGAATTATTATTCGAATAGAATTAAGAATTCCTGGTTCATGAATTTCAAATGATCAAACTTATAATTCTTTAGTTACTGCTCATGCTTTTTTAATAATTTTTTTCTTAGTTATACCATTCTTAATTGGGGGATTTGGAAATTGATTAATTCCTTTAATATTAGGAATTCCAGATATAGCATTTCCACGAATAAATAATATTAGATTTTGACTTTTACCTCCTTCTTTAATACTTTTATTATTAAGAAATTTTATAAATCCTAGTCCAGGAACTGGATGAACTGTTTATCCACCTTTATCTTCTCATTTATTTCATTCTTCTCCTTCAGTTGATATAGCTATTTTTTCTTTACATATTTCTGGTTTATCTTCTATTATAGGTTCATTAAATTTTATTGTTACAATTATTATAATAAAAAATATTTCTTTAAAACATATTCAATTACCTTTATTTCCTTGATCTGTCTTTATTACTACTATTTTATTACTTTTTTCTTTACCTGTTTTAGCAGGTGCAATTACTATATTATTATTTGATCGAAATTTTAATACTTCATTTTTTGATCCTACAGGAGGAGGAGATCCTATTCTTTATCAACATTTATTT"
#>
#> $`BCHYM1499-13`
#> [1] "AATTCTTTACATAATTTTTGCTTTATGATCTGGAATAATTGGGTCAGCAATAAGAATTATTATTCGAATAGAATTAAGTATCCCAGGTTCATGAATTACTAATGATCAAATTTATAATTCTTTAGTAACTGCACATGCTTTTTTAATAATTTTTTTTCTTGTGATACCATTTTTAATTGGAGGATTTGGAAATTGATTAATTCCTTTAATATTAGGAATTCCAGATATAGCTTTCCCACGAATAAACAATATTAGATTTTGATTATTACCGCCATCTTTAATATTATTACTTTTAAGAAATTTTTTAAATCCAAGTCCTGGAACAGGATGAACAGTTTATCCCCCTTTATCATCAAATTTATTTCATTCTTCTCCTTCAGTTGATTTAGCAATTTTTTCTTTACATATTTCAGGTTTATCTTCTATTATAGGTTCATTAAATTTTATTGTTACAATTATTATAATAAAAAATATTTCTTTAAAATATATTCAATTGCCTTTATTTCCTTGATCTGTATTTATTACTACTATTCTTTTATTATTTTCTTTACCTGTGTTAGCTGGAGCTATTACTATATTATTATTTGATCGAAATTTTAATACATCTTTTTTTGATCCTACAGGAGGAGGAGATCCAATTCTTTATCAACATTTATTT"
```
Or you can index to a specific sequence like
```r
res$fasta['GBAH0293-06']
#> $`GBAH0293-06`
#> [1] "------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------TTAATGTTAGGGATTCCAGATATAGCTTTTCCACGAATAAATAATATTAGATTTTGACTGTTACCTCCATCTTTAATATTATTACTTTTAAGAAATTTTTTAAATCCAAGTCCTGGAACAGGATGAACAGTTTATCCTCCTTTATCATCAAATTTATTTCATTCTTCTCCTTCAGTTGATTTAGCAATTTTTTCTTTACATATTTCAGGTTTATCTTCTATTATAGGTTCATTAAATTTTATTGTTACAATTATTATAATAAAAAATATTTCTTTAAAATATATTCAATTACCTTTATTTTCTTGATCTGTATTTATTACTACTATTCTTTTATTATTTTCTTTACCTGTATTAGCTGGAGCTATTACTATATTATTATTTGATCGAAATTTTAATACATCTTTTTTTGATCCAACAGGAGGGGGAGATCCAATTCTTTATCAACATTTATTTTGATTTTTTGGTCATCCTGAAGTTTATATTTTAATTTTACCTGGATTTGGATTAATTTCTCAAATTATTTCTAATGAAAGAGGAAAAAAAGAAACTTTTGGAAATATTGGTATAATTTATGCTATATTAAGAATTGGACTTTTAGGTTTTATTGTT---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"
```
### Get trace files
This function downloads files to your machine - it does not load them into your R session - but prints out where the files are for your information.
```r
bold_trace(taxon = 'Osmia', quiet = TRUE)
```
bold/tests/ 0000755 0001762 0000144 00000000000 13134420302 012324 5 ustar ligges users bold/tests/testthat/ 0000755 0001762 0000144 00000000000 13134420302 014164 5 ustar ligges users bold/tests/testthat/test-bold_tax_id.R 0000644 0001762 0000144 00000003647 12773532625 017572 0 ustar ligges users context("bold_tax_id")
test_that("bold_tax_id returns the correct classes", {
skip_on_cran()
aa <- bold_tax_id(88899)
bb <- bold_tax_id(125295)
expect_is(aa, "data.frame")
expect_is(bb, "data.frame")
expect_is(aa$input, "numeric")
expect_is(aa$taxid, "integer")
expect_is(aa$tax_rank, "character")
})
test_that("bold_tax_id works with multiple ids passed in", {
skip_on_cran()
aa <- bold_tax_id(c(88899,125295))
expect_is(aa, "data.frame")
expect_equal(NROW(aa), 2)
})
test_that("bold_tax_id dataTypes param works as expected", {
skip_on_cran()
aa <- bold_tax_id(88899, dataTypes = "basic")
bb <- bold_tax_id(88899, dataTypes = "stats")
dd <- bold_tax_id(88899, dataTypes = "geo")
ee <- bold_tax_id(88899, dataTypes = "sequencinglabs")
ff <- bold_tax_id(321215, dataTypes = "stats") # no public marker sequences
gg <- bold_tax_id(321215, dataTypes = "basic,stats") # no public marker sequences
expect_is(aa, "data.frame")
expect_is(bb, "data.frame")
expect_is(dd, "data.frame")
expect_is(ee, "data.frame")
expect_is(ff, "data.frame")
expect_is(gg, "data.frame")
expect_equal(NROW(aa), 1)
expect_equal(NROW(bb), 1)
expect_equal(NROW(dd), 1)
expect_equal(NROW(ee), 1)
expect_equal(NROW(ff), 1)
expect_equal(NROW(gg), 1)
expect_named(dd, c('input','Brazil','Mexico','Panama','Guatemala','Peru','Bolivia','Ecuador'))
expect_gt(NCOL(bb), NCOL(aa))
expect_gt(NCOL(ee), NCOL(aa))
expect_gt(NCOL(bb), NCOL(ee))
expect_gt(NCOL(ff), NCOL(aa))
expect_gt(NCOL(gg), NCOL(ff))
})
test_that("includeTree param works as expected", {
skip_on_cran()
aa <- bold_tax_id(id=88899, includeTree=FALSE)
bb <- bold_tax_id(id=88899, includeTree=TRUE)
expect_is(aa, "data.frame")
expect_is(bb, "data.frame")
expect_gt(NROW(bb), NROW(aa))
})
test_that("bold_tax_id fails well", {
skip_on_cran()
expect_error(bold_tax_id(), "argument \"id\" is missing, with no default")
})
bold/tests/testthat/test-bold_tax_name.R 0000644 0001762 0000144 00000001455 12676331243 020104 0 ustar ligges users context("bold_tax_name")
test_that("bold_tax_name returns the correct classes", {
skip_on_cran()
a <- bold_tax_name(name='Diplura')
b <- bold_tax_name(name=c('Diplura','Osmia'))
cc <- bold_tax_name(name=c("Apis","Puma concolor","Pinus concolor"))
expect_is(a, "data.frame")
expect_is(b, "data.frame")
expect_is(cc, "data.frame")
expect_is(a$input, "character")
expect_is(a$taxid, "integer")
})
test_that("bold_tax_name fails well", {
skip_on_cran()
expect_error(bold_tax_name(), "argument \"name\" is missing, with no default")
})
test_that("fuzzy works", {
skip_on_cran()
aa <- bold_tax_name(name='Diplur', fuzzy=TRUE)
aa_not <- bold_tax_name(name='Diplur', fuzzy=FALSE)
expect_is(aa, "data.frame")
expect_is(aa$input, "character")
expect_gt(NROW(aa), NROW(aa_not))
})
bold/tests/testthat/test-bold_specimens.R 0000644 0001762 0000144 00000001737 13134177202 020272 0 ustar ligges users # tests for bold_specimens fxn in bold
context("bold_specimens")
test_that("bold_specimens returns the correct dimensions or values", {
skip_on_cran()
a <- bold_specimens(taxon='Osmia')
b <- bold_specimens(taxon='Osmia', format='xml', response=TRUE)
expect_equal(b$status_code, 200)
expect_equal(b$response_headers$`content-type`, "application/x-download")
expect_is(a, "data.frame")
expect_is(b, "HttpResponse")
expect_is(a$recordID, "integer")
expect_is(a$processid, "character")
expect_is(b$response_headers, "list")
})
test_that("Throws warning on call that takes forever including timeout in callopts", {
skip_on_cran()
expect_error(bold_specimens(geo='Costa Rica', timeout_ms = 2), "Timeout was reached")
})
test_that("bold_seq returns correct thing when parameters empty or not given", {
skip_on_cran()
expect_error(bold_specimens(taxon=''), "must provide a non-empty value")
expect_error(bold_specimens(), "must provide a non-empty value")
})
bold/tests/testthat/test-bold_seq.R 0000644 0001762 0000144 00000001506 13121303572 017063 0 ustar ligges users # tests for bold_seq fxn in bold
context("bold_seq")
test_that("bold_seq returns the correct dimensions/classes", {
skip_on_cran()
a <- bold_seq(taxon='Coelioxys')
b <- bold_seq(bin='BOLD:AAA5125')
c <- bold_seq(taxon='Coelioxys', response=TRUE)
expect_equal(c$status_code, 200)
expect_equal(c$response_headers$`content-type`, "application/x-download")
expect_is(a, "list")
expect_is(b, "list")
expect_is(a[[1]], "list")
expect_is(a[[1]]$id, "character")
expect_is(a[[1]]$sequence, "character")
expect_is(c, "HttpResponse")
expect_is(c$response_headers, "list")
})
test_that("bold_seq returns correct error when parameters empty or not given", {
skip_on_cran()
expect_error(bold_seq(taxon = ''), "must provide a non-empty value")
expect_error(bold_seq(), "must provide a non-empty value")
})
bold/tests/testthat/test-bold_seqspec.R 0000644 0001762 0000144 00000001656 13121303643 017743 0 ustar ligges users # tests for bold_seqspec fxn in bold
context("bold_seqspec")
test_that("bold_seqspec returns the correct dimensions or values", {
skip_on_cran()
a <- bold_seqspec(taxon='Osmia')
b <- bold_seqspec(taxon='Osmia', response=TRUE)
c <- bold_seqspec(taxon='Osmia', sepfasta=TRUE)
expect_equal(b$status_code, 200)
expect_equal(b$response_headers$`content-type`, "application/x-download")
expect_is(a, "data.frame")
expect_is(b, "HttpResponse")
expect_is(c, "list")
expect_is(c$data, "data.frame")
expect_is(c$fasta, "list")
expect_is(c$fasta[[1]], "character")
expect_is(a$recordID, "integer")
expect_is(a$directions, "character")
expect_is(b$response_headers, "list")
})
test_that("bold_seq returns correct error when parameters empty or not given", {
skip_on_cran()
expect_error(bold_seqspec(taxon=''), "must provide a non-empty value")
expect_error(bold_seqspec(), "must provide a non-empty value")
})
bold/tests/testthat/test-bold_identify.R 0000644 0001762 0000144 00000001510 13121307503 020077 0 ustar ligges users context("bold_identify")
seq <- sequences$seq1
test_that("bold_identify works as expected", {
skip_on_cran()
aa <- bold_identify(seq)
expect_is(aa, 'list')
expect_is(aa[[1]], 'data.frame')
expect_is(aa[[1]]$ID, 'character')
})
test_that("bold_identify db param works as expected", {
skip_on_cran()
aa <- bold_identify(seq, db = 'COX1_SPECIES')
expect_is(aa, 'list')
expect_is(aa[[1]], 'data.frame')
expect_is(aa[[1]]$ID, 'character')
})
test_that("bold_identify response param works as expected", {
skip_on_cran()
aa <- bold_identify(seq, response = TRUE)
expect_is(aa, "list")
expect_is(aa[[1]], "HttpResponse")
expect_equal(aa[[1]]$status_code, 200)
})
test_that("bold_identify fails well", {
skip_on_cran()
expect_error(bold_identify(), "argument \"sequences\" is missing, with no default")
})
bold/tests/test-all.R 0000644 0001762 0000144 00000000045 12341431353 014202 0 ustar ligges users library(testthat)
test_check('bold')
bold/NAMESPACE 0000644 0001762 0000144 00000001474 13134207626 012423 0 ustar ligges users # Generated by roxygen2: do not edit by hand
S3method(bold_identify_parents,data.frame)
S3method(bold_identify_parents,default)
S3method(bold_identify_parents,list)
S3method(print,boldtrace)
export(bold_filter)
export(bold_identify)
export(bold_identify_parents)
export(bold_seq)
export(bold_seqspec)
export(bold_specimens)
export(bold_stats)
export(bold_tax_id)
export(bold_tax_name)
export(bold_trace)
export(read_trace)
importFrom(crul,HttpClient)
importFrom(crul,url_build)
importFrom(jsonlite,fromJSON)
importFrom(plyr,rbind.fill)
importFrom(reshape,sort_df)
importFrom(stringr,str_replace)
importFrom(stringr,str_replace_all)
importFrom(stringr,str_split)
importFrom(xml2,as_list)
importFrom(xml2,read_xml)
importFrom(xml2,xml_find_all)
importFrom(xml2,xml_find_first)
importFrom(xml2,xml_name)
importFrom(xml2,xml_text)
bold/NEWS.md 0000644 0001762 0000144 00000010711 13134225567 012300 0 ustar ligges users bold 0.5.0
==========
### NEW FEATURES
* Now using BOLD's v4 API throughout the package. This was essentially
just a change of the BASE URL for each request (#30)
* Now using `crul` for HTTP requests. Only really affects users in that
specifying curl options works slightly differenlty (#42)
### BUG FIXES
* `marker` parameter in `bold_seqspec` was and maybe still is not working,
in the sense that using the parameter doesn't always limit results to the
marker you specify. Not really fixed - watch out for it, and filter after you
get results back to get markers you want. (#25)
* Fixed bug in `bold_identify_parents` - was failing when no match for a
parent name. (#41) thx @VascoElbrecht
* `tsv` results were erroring in `bold_specimens` and other fxns (#46) - fixed
by switching to new BOLD v4 API (#30)
### MINOR IMPROVEMENTS
* Namespace calls to base pkgs for `stats` and `utils` - replaced
`is` with `inherits` (#39)
bold 0.4.0
==========
### NEW FEATURES
* New function `bold_identify_parents()` to add taxonomic information
to the output of `bold_identif()`. We take the taxon names from `bold_identify`
output, and use `bold_tax_name` to get the taxonomic ID, passing it to
`bold_tax_id` to get the parent names, then attaches those to the input data.
There are two options given what you put for the `wide` parameter. If `TRUE`
you get data.frames of the same dimensions with parent rank name and ID
as new columns (for each name going up the hierarchy) - while if `FALSE`
you get a long data.frame. thanks @dougwyu for inspiring this (#36)
### MINOR IMPROVEMENTS
* replace `xml2::xml_find_one` with `xml2::xml_find_first` (#33)
* Fix description of `db` options in `bold_identify` man file -
COX1 and COX1_SPECIES were switched (#37) thanks for pointing that out
@dougwyu
### BUG FIXES
* Fix to `bold_tax_id` for when some elements returned from the BOLD
API were empty/`NULL` (#32) thanks @fmichonneau !!
bold 0.3.5
==========
### MINOR IMPROVEMENTS
* Added more tests to the test suite (#28)
### BUG FIXES
* Fixed a bug in an internal data parser (#27)
bold 0.3.4
==========
### NEW FEATURES
* Added a code of conduct
### MINOR IMPROVEMENTS
* Switched to `xml2` from `XML` as the XML parser for this package (#26)
* Fixes to `bold_trace()` to create dir and tar file when it doesn't
already exist
### BUG FIXES
* Fixed odd problem where sometimes resulting data from HTTP request
was garbled on `content(x, "text")`, so now using `rawToChar(content(x))`,
which works (#24)
bold 0.3.0
==========
### MINOR IMPROVEMENTS
* Explicitly import non-base R functions (#22)
* Better package level manual file
bold 0.2.6
==========
### MINOR IMPROVEMENTS
* `sangerseqR` package now in Suggests for reading trace files, and is only used in `bold_trace()`
function.
* General code tidying, reduction of code duplication.
* `bold_trace()` gains two new parameters: `overwrite` to choose whether to overwrite an existing
file of the same name or not, `progress` to show a progress bar for downloading or not.
* `bold_trace()` gains a print method to show a tidy summary of the trace file downloaded.
### BUG FIXES
* Fixed similar bugs in `bold_tax_name()` (#17) and `bold_tax_id()` (#18) in which species that were missing from the BOLD database returned empty arrays but 200 status codes. Parsing those as failed attempts now. Also fixes problem in taxize in `bold_search()` that use these two functions.
bold 0.2.0
==========
### NEW FEATURES
* Package gains two new functions for working with the BOLD taxonomy APIs: `bold_tax_name()` and `bold_tax_id()`, which search for taxonomic data from BOLD using either names or BOLD identifiers, respectively. (#11)
* Two new packages in Imports: `jsonlite` and `reshape`.
### MINOR IMPROVEMENTS
* Added new taxonomy API functions to the vignette (#14)
* Added reference URLS to all function doc files to allow easy reference for the appropriate API docs.
* `callopts` parameter changed to `...` throughout the package, so that passing on options to `httr::GET` is done via named parameters, e.g., `config=verbose()`. (#13)
* Added examples of doing curl debugging throughout man pages.
bold 0.1.2
==========
### MINOR IMPROVEMENTS
* Improved the vignette (#8)
* Added small function to print helpful message when user inputs no parameters or zero length parameter values.
### BUG FIXES
* Fixed some broken tests with the new `httr` (v0.4) (#9), and added a few more tests (#7)
bold 0.1.0
==========
### NEW FEATURES
* released to CRAN
bold/data/ 0000755 0001762 0000144 00000000000 12370735031 012104 5 ustar ligges users bold/data/sequences.RData 0000644 0001762 0000144 00000001225 12370735031 015014 0 ustar ligges users UNA}pR$#aQ !%|Ue/Ѝۏ}uz;-˲_ݲ??zz~y;?=Ώ@~Uؽ{e}6^+w|
)(2>jS։=fIҜУD&'eqG BP8Õ.` 0ތ2elQA%~"N=T8FLnpD<7Z"P!%ſI90bdJ3I䂢I 9gJW L2@@g4%o?Ntm!Nkr>|ZcŐJKRf0=4xIxIi|Y+#/kO*CŇgJYE$3ya8/G;p,0j5Z7=͛Q$Kv
ű 4h >]**4Y݆Yيbh2Z $DAQ+憬6CA$|?QGLEObJ)rgre{gvM@e{Ԅul:G@ql
%;W]H
MP
ǔ
Y&tT<8}3%Z;y?{$=?l~C(7.W bold/R/ 0000755 0001762 0000144 00000000000 13134206376 011400 5 ustar ligges users bold/R/bold-package.R 0000644 0001762 0000144 00000003375 13134212646 014041 0 ustar ligges users #' bold: A programmatic interface to the Barcode of Life data.
#'
#' @section About:
#'
#' This package gives you access to data from BOLD System
#' \url{http://www.boldsystems.org/} via their API
#' (\url{http://v4.boldsystems.org/index.php/api_home})
#'
#' @section Functions:
#'
#' \itemize{
#' \item \code{\link{bold_specimens}} - Search for specimen data.
#' \item \code{\link{bold_seq}} - Search for and retrieve sequences.
#' \item \code{\link{bold_seqspec}} - Get sequence and specimen data together.
#' \item \code{\link{bold_trace}} - Get trace files - saves to disk.
#' \item \code{\link{read_trace}} - Read trace files into R.
#' \item \code{\link{bold_tax_name}} - Get taxonomic names via input names.
#' \item \code{\link{bold_tax_id}} - Get taxonomic names via BOLD identifiers.
#' \item \code{\link{bold_identify}} - Search for match given a COI sequence.
#' }
#'
#' Interestingly, they provide xml and tsv format data for the specimen data,
#' while they provide fasta data format for the sequence data. So for the
#' specimen data you can get back raw XML, or a data frame parsed from the
#' tsv data, while for sequence data you get back a list (b/c sequences are
#' quite long and would make a data frame unwieldy).
#'
#' @importFrom crul HttpClient url_build
#' @importFrom xml2 read_xml xml_find_all xml_find_first xml_text
#' xml_name as_list
#' @importFrom jsonlite fromJSON
#' @importFrom reshape sort_df
#' @importFrom plyr rbind.fill
#' @docType package
#' @name bold-package
#' @aliases bold
NULL
#' List of 3 nucleotide sequences to use in examples for the
#' \code{\link{bold_identify}} function
#'
#' @details Each sequence is a character string, of lengths 410, 600, and 696.
#' @name sequences
#' @docType data
#' @keywords data
NULL
bold/R/bold_specimens.R 0000644 0001762 0000144 00000004255 13134217620 014511 0 ustar ligges users #' Search BOLD for specimens.
#'
#' @export
#' @template args
#' @template otherargs
#' @references
#' \url{http://v4.boldsystems.org/index.php/resources/api?type=webservices}
#'
#' @param format (character) One of xml, json, tsv (default). tsv format gives
#' back a data.frame object. xml gives back parsed XML as \code{xml_document}
#' object. 'json' (JavaScript Object Notation) and 'dwc' (Darwin Core Archive)
#' are supported in theory, but the JSON can be malformed, so we don't support
#' that here, and the DWC option actually returns TSV.
#'
#' @examples \dontrun{
#' bold_specimens(taxon='Osmia')
#' bold_specimens(taxon='Osmia', format='xml')
#' bold_specimens(taxon='Osmia', response=TRUE)
#' res <- bold_specimens(taxon='Osmia', format='xml', response=TRUE)
#' res$url
#' res$status_code
#' res$response_headers
#'
#' # More than 1 can be given for all search parameters
#' bold_specimens(taxon=c('Coelioxys','Osmia'))
#'
#' ## curl debugging
#' ### These examples below take a long time, so you can set a timeout so that
#' ### it stops by X sec
#' head(bold_specimens(taxon='Osmia', verbose = TRUE))
#' # head(bold_specimens(geo='Costa Rica', timeout_ms = 6))
#' }
bold_specimens <- function(taxon = NULL, ids = NULL, bin = NULL,
container = NULL, institutions = NULL, researchers = NULL, geo = NULL,
response=FALSE, format = 'tsv', ...) {
format <- match.arg(format, choices = c('xml', 'tsv'))
args <- bc(list(taxon=pipeornull(taxon), geo=pipeornull(geo),
ids=pipeornull(ids), bin=pipeornull(bin),
container=pipeornull(container),
institutions=pipeornull(institutions),
researchers=pipeornull(researchers),
format = format))
check_args_given_nonempty(args, c('taxon','ids','bin','container',
'institutions','researchers','geo'))
out <- b_GET(paste0(bbase(), 'API_Public/specimen'), args, ...)
if (response) {
out
} else {
tt <- out$parse("UTF-8")
switch(format,
xml = xml2::read_xml(tt),
tsv = utils::read.delim(text = tt, header = TRUE, sep = "\t",
stringsAsFactors = FALSE)
)
}
}
bold/R/bold_identify.R 0000644 0001762 0000144 00000007520 13134220464 014334 0 ustar ligges users #' Search for matches to sequences against the BOLD COI database.
#'
#' @export
#'
#' @param sequences (character) Returns all records containing matching marker
#' codes. Required.
#' @param db (character) The database to match against, one of COX1,
#' COX1_SPECIES, COX1_SPECIES_PUBLIC, OR COX1_L604bp. See Details for
#' more information.
#' @param response (logical) Note that response is the object that returns
#' from the Curl call, useful for debugging, and getting detailed info on
#' the API call.
#' @param ... Further args passed on to \code{\link[crul]{HttpClient}}, main
#' purpose being curl debugging
#'
#' @section db parmeter options:
#' \itemize{
#' \item COX1 Every COI barcode record on BOLD with a minimum sequence
#' length of 500bp (warning: unvalidated library and includes records without
#' species level identification). This includes many species represented by
#' only one or two specimens as well as all species with interim taxonomy. This
#' search only returns a list of the nearest matches and does not provide a
#' probability of placement to a taxon.
#' \item COX1_SPECIES Every COI barcode record with a species level
#' identification and a minimum sequence length of 500bp. This includes
#' many species represented by only one or two specimens as well as all
#' species with interim taxonomy.
#' \item COX1_SPECIES_PUBLIC All published COI records from BOLD and GenBank
#' with a minimum sequence length of 500bp. This library is a collection of
#' records from the published projects section of BOLD.
#' \item OR COX1_L604bp Subset of the Species library with a minimum sequence
#' length of 640bp and containing both public and private records. This library
#' is intended for short sequence identification as it provides maximum overlap
#' with short reads from the barcode region of COI.
#' }
#'
#' @section Named outputs:
#' To maintain names on the output list of data make sure to pass in a
#' named list to the \code{sequences} parameter. You can for example,
#' take a list of sequences, and use \code{\link{setNames}} to set names.
#'
#' @return A data.frame with details for each specimen matched. if a
#' failed request, returns \code{NULL}
#' @references
#' \url{http://v4.boldsystems.org/index.php/resources/api?type=idengine}
#' @seealso \code{\link{bold_identify_parents}}
#' @examples \dontrun{
#' seq <- sequences$seq1
#' res <- bold_identify(sequences=seq)
#' head(res[[1]])
#' head(bold_identify(sequences=seq, db='COX1_SPECIES')[[1]])
#' }
bold_identify <- function(sequences, db = 'COX1', response=FALSE, ...) {
foo <- function(a, b){
args <- bc(list(sequence = a, db = b))
cli <- crul::HttpClient$new(url = paste0(bbase(), 'Ids_xml'))
out <- cli$get(query = args, ...)
out$raise_for_status()
stopifnot(out$headers$`content-type` == 'text/xml')
if (response) {
out
} else {
tt <- out$parse('UTF-8')
xml <- xml2::read_xml(tt)
nodes <- xml2::xml_find_all(xml, "//match")
toget <- c("ID","sequencedescription","database",
"citation","taxonomicidentification","similarity")
outlist <- lapply(nodes, function(x){
tmp2 <- vapply(toget, function(y) {
tmp <- xml2::xml_find_first(x, y)
stats::setNames(xml2::xml_text(tmp), xml2::xml_name(tmp))
}, "")
spectmp <- xml2::as_list(xml2::xml_find_first(x, "specimen"))
spectmp <- unnest(spectmp)
names(spectmp) <- c('specimen_url','specimen_country',
'specimen_lat','specimen_lon')
spectmp[sapply(spectmp, is.null)] <- NA
data.frame(c(tmp2, spectmp), stringsAsFactors = FALSE)
})
do.call(rbind.fill, outlist)
}
}
lapply(sequences, foo, b = db)
}
unnest <- function(x){
if (is.null(names(x))) {
list(unname(unlist(x)))
} else {
do.call("c", lapply(x, unnest))
}
}
bold/R/bold_trace.R 0000644 0001762 0000144 00000006253 13134226522 013622 0 ustar ligges users #' Get BOLD trace files
#'
#' @export
#' @template args
#' @references
#' \url{http://v4.boldsystems.org/index.php/resources/api?type=webservices}
#'
#' @param marker (character) Returns all records containing matching
#' marker codes.
#' @param dest (character) A directory to write the files to
#' @param overwrite (logical) Overwrite existing directory and file?
#' @param progress (logical) Print progress or not. NOT AVAILABLE FOR NOW.
#' HOPEFULLY WILL RETURN SOON.
#' @param ... Further args passed on to \code{\link[crul]{HttpClient}}
#' @param x Object to print or read.
#'
#' @examples \dontrun{
#' # Use a specific destination directory
#' bold_trace(taxon='Bombus', geo='Alaska', dest="~/mytarfiles")
#'
#' # Another example
#' # bold_trace(ids='ACRJP618-11', dest="~/mytarfiles")
#' # bold_trace(ids=c('ACRJP618-11','ACRJP619-11'), dest="~/mytarfiles")
#'
#' # read file in
#' x <- bold_trace(ids=c('ACRJP618-11','ACRJP619-11'), dest="~/mytarfiles")
#' (res <- read_trace(x$ab1[2]))
#'
#' # The progress dialog is pretty verbose, so quiet=TRUE is a nice touch,
#' # but not by default
#' # Beware, this one take a while
#' # x <- bold_trace(taxon='Osmia', quiet=TRUE)
#'
#' if (requireNamespace("sangerseqR", quietly = TRUE)) {
#' library("sangerseqR")
#' primarySeq(res)
#' secondarySeq(res)
#' head(traceMatrix(res))
#' }
#' }
bold_trace <- function(taxon = NULL, ids = NULL, bin = NULL, container = NULL,
institutions = NULL, researchers = NULL, geo = NULL, marker = NULL, dest=NULL,
overwrite = TRUE, progress = TRUE, ...) {
if (!requireNamespace("sangerseqR", quietly = TRUE)) {
stop("Please install sangerseqR", call. = FALSE)
}
args <- bc(list(taxon=pipeornull(taxon), geo=pipeornull(geo),
ids=pipeornull(ids), bin=pipeornull(bin), container=pipeornull(container),
institutions=pipeornull(institutions), researchers=pipeornull(researchers),
marker=pipeornull(marker)))
url <- crul::url_build(paste0(bbase(), 'API_Public/trace'), query = args)
if (is.null(dest)) {
destfile <- paste0(getwd(), "/bold_trace_files.tar")
destdir <- paste0(getwd(), "/bold_trace_files")
} else {
destdir <- path.expand(dest)
destfile <- paste0(destdir, "/bold_trace_files.tar")
}
dir.create(destdir, showWarnings = FALSE, recursive = TRUE)
if (!file.exists(destfile)) file.create(destfile, showWarnings = FALSE)
cli <- crul::HttpClient$new(url = url)
res <- cli$get(disk = destfile, ...)
utils::untar(destfile, exdir = destdir)
files <- list.files(destdir, full.names = TRUE)
ab1 <- list.files(destdir, pattern = ".ab1", full.names = TRUE)
structure(list(destfile = destfile, destdir = destdir, ab1 = ab1,
args = args), class = "boldtrace")
}
#' @export
print.boldtrace <- function(x, ...){
cat("\n", "\n\n")
ff <- x$ab1[1:min(10, length(x$ab1))]
if (length(ff) < length(x$ab1)) ff <- c(ff, "...")
cat(ff, sep = "\n")
}
#' @export
#' @rdname bold_trace
read_trace <- function(x){
if (inherits(x, "boldtrace")) {
if (length(x$ab1) > 1) stop("Number of paths > 1, just pass one in",
call. = FALSE)
sangerseqR::readsangerseq(x$ab1)
} else {
sangerseqR::readsangerseq(x)
}
}
bold/R/bold_filter.R 0000644 0001762 0000144 00000003564 13134226477 014024 0 ustar ligges users #' Get BOLD specimen + sequence data.
#'
#' @export
#' @param x (data.frame) a data.frame, as returned from
#' \code{\link{bold_seqspec}}. Note that some combinations of parameters
#' in \code{\link{bold_seqspec}} don't return a data.frame. Stops with
#' error message if this is not a data.frame. Required.
#' @param by (character) the column by which to group. For example,
#' if you want the longest sequence for each unique species name, then
#' pass \strong{species_name}. If the column doesn't exist, error
#' with message saying so. Required.
#' @param how (character) one of "max" or "min", which get used as
#' \code{which.max} or \code{which.min} to get the longest or shortest
#' sequence, respectively. Note that we remove gap/alignment characters
#' (\code{-})
#' @return a tibble/data.frame
#' @examples \dontrun{
#' res <- bold_seqspec(taxon='Osmia')
#' maxx <- bold_filter(res, by = "species_name")
#' minn <- bold_filter(res, by = "species_name", how = "min")
#'
#' vapply(maxx$nucleotides, nchar, 1, USE.NAMES = FALSE)
#' vapply(minn$nucleotides, nchar, 1, USE.NAMES = FALSE)
#' }
bold_filter <- function(x, by, how = "max") {
if (!inherits(x, "data.frame")) stop("'x' must be a data.frame",
call. = FALSE)
if (!how %in% c("min", "max")) stop("'how' must be one of 'min' or 'max'",
call. = FALSE)
if (!by %in% names(x)) stop(sprintf("'%s' is not a valid column in 'x'", by),
call. = FALSE)
xsp <- split(x, x[[by]])
tibble::as_data_frame(setrbind(lapply(xsp, function(z) {
lgts <- vapply(z$nucleotides, function(w) nchar(gsub("-", "", w)), 1,
USE.NAMES = FALSE)
z[eval(parse(text = paste0("which.", how)))(lgts), ]
})))
}
setrbind <- function(x) {
(xxx <- data.table::setDF(
data.table::rbindlist(x, fill = TRUE, use.names = TRUE))
)
}
bold/R/bold_tax_id.R 0000644 0001762 0000144 00000004144 13134220021 013755 0 ustar ligges users #' Search BOLD for taxonomy data by BOLD ID.
#'
#' @export
#' @param id (integer) One or more BOLD taxonomic identifiers. required.
#' @param dataTypes (character) Specifies the datatypes that will be
#' returned. 'all' returns all data. 'basic' returns basic taxon information.
#' 'images' returns specimen images.
#' @param includeTree (logical) If TRUE (default: FALSE), returns a list
#' containing information for parent taxa as well as the specified taxon.
#' @template otherargs
#' @references
#' \url{http://v4.boldsystems.org/index.php/resources/api?type=taxonomy}
#' @seealso \code{bold_tax_name}
#' @examples \dontrun{
#' bold_tax_id(id=88899)
#' bold_tax_id(id=88899, includeTree=TRUE)
#' bold_tax_id(id=88899, includeTree=TRUE, dataTypes = "stats")
#' bold_tax_id(id=c(88899,125295))
#'
#' ## dataTypes parameter
#' bold_tax_id(id=88899, dataTypes = "basic")
#' bold_tax_id(id=88899, dataTypes = "stats")
#' bold_tax_id(id=88899, dataTypes = "images")
#' bold_tax_id(id=88899, dataTypes = "geo")
#' bold_tax_id(id=88899, dataTypes = "sequencinglabs")
#' bold_tax_id(id=88899, dataTypes = "depository")
#' bold_tax_id(id=c(88899,125295), dataTypes = "geo")
#' bold_tax_id(id=c(88899,125295), dataTypes = "images")
#'
#' ## Passing in NA
#' bold_tax_id(id = NA)
#' bold_tax_id(id = c(88899,125295,NA))
#'
#' ## get http response object only
#' bold_tax_id(id=88899, response=TRUE)
#' bold_tax_id(id=c(88899,125295), response=TRUE)
#'
#' ## curl debugging
#' bold_tax_id(id=88899, verbose = TRUE)
#' }
bold_tax_id <- function(id, dataTypes = 'basic', includeTree = FALSE,
response = FALSE, ...) {
tmp <- lapply(id, function(x)
get_response(args = bc(list(
taxId = x, dataTypes = dataTypes,
includeTree = if (includeTree) 'true' else NULL)),
url = paste0(bbase(), "API_Tax/TaxonData"), ...)
)
if (response) {
tmp
} else {
res <- do.call(rbind.fill, Map(process_response, x = tmp, y = id,
z = includeTree, w = dataTypes))
if (NCOL(res) == 1) {
res$noresults <- NA
return(res)
} else {
res
}
}
}
bold/R/bold_seqspec.R 0000644 0001762 0000144 00000006536 13134217664 014202 0 ustar ligges users #' Get BOLD specimen + sequence data.
#'
#' @export
#' @template args
#' @template otherargs
#' @references
#' \url{http://v4.boldsystems.org/index.php/resources/api?type=webservices}
#'
#' @param marker (character) Returns all records containing matching marker
#' codes. See Details.
#' @param format (character) One of xml or tsv (default). tsv format gives
#' back a data.frame object. xml gives back parsed xml as a
#' @param sepfasta (logical) If \code{TRUE}, the fasta data is separated into
#' a list with names matching the processid's from the data frame.
#' Default: \code{FALSE}
#'
#' @return Either a data.frame, parsed xml, a http response object, or a list
#' with length two (a data.frame w/o nucleotide data, and a list with
#' nucleotide data)
#'
#' @section Marker:
#' Notes from BOLD on the \code{marker} param:
#' "All markers for a specimen matching the search string will be returned.
#' ie. A record with COI-5P and ITS will return sequence data for both
#' markers even if only COI-5P was specified."
#'
#' You will likely end up with data with markers that you did not request -
#' just be sure to filter those out as needed.
#'
#' @examples \dontrun{
#' bold_seqspec(taxon='Osmia')
#' bold_seqspec(taxon='Osmia', format='xml')
#' bold_seqspec(taxon='Osmia', response=TRUE)
#' res <- bold_seqspec(taxon='Osmia', sepfasta=TRUE)
#' res$fasta[1:2]
#' res$fasta['GBAH0293-06']
#'
#' # records that match a marker name
#' res <- bold_seqspec(taxon="Melanogrammus aeglefinus", marker="COI-5P")
#'
#' # records that match a geographic locality
#' res <- bold_seqspec(taxon="Melanogrammus aeglefinus", geo="Canada")
#'
#' ## curl debugging
#' ### You can do many things, including get verbose output on the curl call,
#' ### and set a timeout
#' head(bold_seqspec(taxon='Osmia', verbose = TRUE))
#' ## timeout
#' # head(bold_seqspec(taxon='Osmia', timeout_ms = 1))
#' }
bold_seqspec <- function(taxon = NULL, ids = NULL, bin = NULL, container = NULL,
institutions = NULL, researchers = NULL, geo = NULL, marker = NULL,
response=FALSE, format = 'tsv', sepfasta=FALSE, ...) {
format <- match.arg(format, choices = c('xml', 'tsv'))
args <- bc(list(taxon = pipeornull(taxon), geo = pipeornull(geo),
ids = pipeornull(ids), bin = pipeornull(bin),
container = pipeornull(container),
institutions = pipeornull(institutions),
researchers = pipeornull(researchers),
marker = pipeornull(marker), combined_download = format))
check_args_given_nonempty(args, c('taxon', 'ids', 'bin', 'container',
'institutions', 'researchers',
'geo', 'marker'))
out <- b_GET(paste0(bbase(), 'API_Public/combined'), args, ...)
if (response) {
out
} else {
tt <- paste0(rawToChar(out$content, multiple = TRUE), collapse = "")
if (tt == "") return(NA)
temp <- switch(
format,
xml = xml2::read_xml(tt),
tsv = utils::read.delim(text = tt, header = TRUE, sep = "\t",
stringsAsFactors = FALSE)
)
if (!sepfasta) {
temp
} else {
if (format == "tsv") {
fasta <- as.list(temp$nucleotides)
names(fasta) <- temp$processid
df <- temp[ , !names(temp) %in% "nucleotides" ]
list(data = df, fasta = fasta)
} else {
temp
}
}
}
}
bold/R/bold_stats.R 0000644 0001762 0000144 00000003751 13134224120 013652 0 ustar ligges users #' Get BOLD stats
#'
#' @export
#' @inheritParams bold_specimens
#' @param dataType (character) one of "overview" or "drill_down" (default).
#' "drill_down": a detailed summary of information which provides record
#' counts by [BINs, Country, Storing Institution, Species]. "overview":
#' the total counts of [BINs, Countries, Storing Institutions, Orders,
#' Families, Genus, Species]
#' @references
#' \url{http://v4.boldsystems.org/index.php/resources/api?type=webservices}
#'
#' @examples \dontrun{
#' x <- bold_stats(taxon='Osmia')
#' x$total_records
#' x$records_with_species_name
#' x$bins
#' x$countries
#' x$depositories
#' x$order
#' x$family
#' x$genus
#' x$species
#'
#' # just get all counts
#' lapply(Filter(is.list, x), "[[", "count")
#'
#' res <- bold_stats(taxon='Osmia', response=TRUE)
#' res$url
#' res$status_code
#' res$response_headers
#'
#' # More than 1 can be given for all search parameters
#' bold_stats(taxon=c('Coelioxys','Osmia'))
#'
#' ## curl debugging
#' ### These examples below take a long time, so you can set a timeout so that
#' ### it stops by X sec
#' bold_stats(taxon='Osmia', verbose = TRUE)
#' # bold_stats(geo='Costa Rica', timeout_ms = 6)
#' }
bold_stats <- function(taxon = NULL, ids = NULL, bin = NULL,
container = NULL, institutions = NULL, researchers = NULL, geo = NULL,
dataType = "drill_down", response=FALSE, ...) {
args <- bc(list(taxon = pipeornull(taxon), geo = pipeornull(geo),
ids = pipeornull(ids), bin = pipeornull(bin),
container = pipeornull(container),
institutions = pipeornull(institutions),
researchers = pipeornull(researchers),
dataType = dataType, format = "json"))
check_args_given_nonempty(args, c('taxon','ids','bin','container',
'institutions','researchers','geo'))
out <- b_GET(paste0(bbase(), 'API_Public/stats'), args, ...)
if (response) return(out)
jsonlite::fromJSON(out$parse("UTF-8"))
}
bold/R/bold_tax_name.R 0000644 0001762 0000144 00000004077 13134212564 014323 0 ustar ligges users #' Search BOLD for taxonomy data by taxonomic name
#'
#' @export
#' @param name (character) One or more scientific names. required.
#' @param fuzzy (logical) Whether to use fuzzy search or not (default: FALSE).
#' @template otherargs
#' @references
#' \url{http://v4.boldsystems.org/index.php/resources/api?type=taxonomy}
#' @details The \code{dataTypes} parameter is not supported in this function.
#' If you want to use that parameter, get an ID from this function and pass
#' it into \code{bold_tax_id}, and then use the \code{dataTypes} parameter.
#' @seealso \code{\link{bold_tax_id}}
#' @examples \dontrun{
#' bold_tax_name(name='Diplura')
#' bold_tax_name(name='Osmia')
#' bold_tax_name(name=c('Diplura','Osmia'))
#' bold_tax_name(name=c("Apis","Puma concolor","Pinus concolor"))
#' bold_tax_name(name='Diplur', fuzzy=TRUE)
#' bold_tax_name(name='Osm', fuzzy=TRUE)
#'
#' ## get http response object only
#' bold_tax_name(name='Diplura', response=TRUE)
#' bold_tax_name(name=c('Diplura','Osmia'), response=TRUE)
#'
#' ## Names with no data in BOLD database
#' bold_tax_name("Nasiaeshna pentacantha")
#' bold_tax_name(name = "Cordulegaster erronea")
#' bold_tax_name(name = "Cordulegaster erronea", response=TRUE)
#'
#' ## curl debugging
#' bold_tax_name(name='Diplura', verbose = TRUE)
#' }
bold_tax_name <- function(name, fuzzy = FALSE, response = FALSE, ...) {
tmp <- lapply(name, function(x)
get_response(bc(list(taxName = x, fuzzy = if (fuzzy) 'true' else NULL)),
url = paste0(bbase(), "API_Tax/TaxonSearch"), ...)
)
if (response) {
tmp
} else {
(vvv <- data.table::setDF(data.table::rbindlist(
Map(process_tax_name, tmp, name),
use.names = TRUE, fill = TRUE)
))
}
}
process_tax_name <- function(x, y) {
tt <- rawToChar(x$content)
out <- if (x$status_code > 202) "stop" else jsonlite::fromJSON(tt, flatten = TRUE)
if ( length(out) == 0 || identical(out[[1]], list()) || out == "stop" ) {
data.frame(input = y, stringsAsFactors = FALSE)
} else {
data.frame(out$top_matched_names, input = y, stringsAsFactors = FALSE)
}
}
bold/R/bold_identify_parents.R 0000644 0001762 0000144 00000006605 13134226537 016102 0 ustar ligges users #' Add taxonomic parent names to a data.frame
#'
#' @export
#' @param x (data.frame/list) list of data.frames - the output from a call to
#' \code{\link{bold_identify}}. or a single data.frame from the output from
#' same. required.
#' @param wide (logical) output in long or wide format. See Details.
#' Default: \code{FALSE}
#'
#' @details This function gets unique set of taxonomic names from the input
#' data.frame, then queries \code{\link{bold_tax_name}} to get the
#' taxonomic ID, passing it to \code{\link{bold_tax_id}} to get the parent
#' names, then attaches those to the input data.
#'
#' Records in the input data that do not have matches for parent names
#' simply get NA values in the added columns.
#'
#' @section wide vs long format:
#' When \code{wide = FALSE} you get many rows for each record. Essentially,
#' we \code{cbind} the taxonomic classification onto the one row from the
#' result of \code{\link{bold_identify}}, giving as many rows as there are
#' taxa in the taxonomic classification.
#'
#' When \code{wide = TRUE} you get one row for each record - thus the
#' dimensions of the input data stay the same. For this option, we take just
#' the rows for taxonomic ID and name for each taxon in the taxonomic
#' classification, and name the columns by the taxon rank, so you get
#' \code{phylum} and \code{phylum_id}, and so on.
#'
#' @return a list of the same length as the input
#'
#' @examples \dontrun{
#' df <- bold_identify(sequences = sequences$seq2)
#'
#' # long format
#' out <- bold_identify_parents(df)
#' str(out)
#' head(out[[1]])
#'
#' # wide format
#' out <- bold_identify_parents(df, wide = TRUE)
#' str(out)
#' head(out[[1]])
#' }
bold_identify_parents <- function(x, wide = FALSE) {
UseMethod("bold_identify_parents")
}
#' @export
bold_identify_parents.default <- function(x, wide = FALSE) {
stop("no 'bold_identify_parents' method for ", class(x), call. = FALSE)
}
#' @export
bold_identify_parents.data.frame <- function(x, wide = FALSE) {
bold_identify_parents(list(x), wide)
}
#' @export
bold_identify_parents.list <- function(x, wide = FALSE) {
# get unique set of names
uniqnms <-
unique(unname(unlist(lapply(x, function(z) z$taxonomicidentification))))
if (is.null(uniqnms)) {
stop("no fields 'taxonomicidentification' found in input", call. = FALSE)
}
# get parent names via bold_tax_name and bold_tax_id
out <- stats::setNames(lapply(uniqnms, function(w) {
tmp <- bold_tax_name(w)
if (!is.null(tmp$taxid)) {
tmp2 <- bold_tax_id(tmp$taxid, includeTree = TRUE)
tmp2$input <- NULL
return(tmp2)
} else {
NULL
}
}), uniqnms)
# remove length zero elements
out <- bc(out)
# appply parent names to input data
lapply(x, function(z) {
if (wide) {
# replace each data.frame with a wide version with just
# taxid and taxon name (with col names with rank name)
out <- lapply(out, function(h) do.call("cbind", (apply(h, 1, function(x) {
tmp <- as.list(x[c('taxid', 'taxon')])
tmp$taxid <- as.numeric(tmp$taxid)
data.frame(stats::setNames(tmp, paste0(x['tax_rank'], c('_id', ''))),
stringsAsFactors = FALSE)
}))))
}
zsplit <- split(z, z$ID)
setrbind(
bc(lapply(zsplit, function(w) {
tmp <- out[names(out) %in% w$taxonomicidentification]
if (!length(tmp)) return(w)
suppressWarnings(cbind(w, tmp[[1]]))
}))
)
})
}
bold/R/zzz.R 0000644 0001762 0000144 00000005274 13134220517 012361 0 ustar ligges users bbase <- function() 'http://v4.boldsystems.org/index.php/'
bc <- function(x) Filter(Negate(is.null), x)
split_fasta <- function(x){
temp <- paste(">", x, sep = "")
seq <- str_replace_all(str_split(str_replace(temp[[1]], "\n", "<<<"),
"<<<")[[1]][[2]], "\n", "")
stuff <- str_split(x, "\\|")[[1]][c(1:3)]
list(id = stuff[1], name = stuff[2], gene = stuff[1], sequence = seq)
}
pipeornull <- function(x){
if (!is.null(x)) {
paste0(x, collapse = "|")
} else {
NULL
}
}
check_args_given_nonempty <- function(arguments, x){
paramnames <- x
matchez <- any(paramnames %in% names(arguments))
if (!matchez) {
stop(sprintf("You must provide a non-empty value to at least one of\n %s",
paste0(paramnames, collapse = "\n ")))
} else {
arguments_noformat <- arguments[ !names(arguments) %in% 'combined_download' ]
argslengths <- vapply(arguments_noformat, nchar, numeric(1),
USE.NAMES = FALSE)
if (any(argslengths == 0)) {
stop(sprintf("You must provide a non-empty value to at least one of\n %s",
paste0(paramnames, collapse = "\n ")))
}
}
}
process_response <- function(x, y, z, w){
tt <- rawToChar(x$content)
out <- if (x$status_code > 202) "stop" else jsonlite::fromJSON(tt)
if ( length(out) == 0 || identical(out[[1]], list()) || out == "stop" ) {
data.frame(input = y, stringsAsFactors = FALSE)
} else {
if (w %in% c("stats",'images','geo','sequencinglabs','depository')) out <- out[[1]]
trynames <- tryCatch(as.numeric(names(out)), warning = function(w) w)
if (!inherits(trynames, "simpleWarning")) names(out) <- NULL
if (any(vapply(out, function(x) is.list(x) && length(x) > 0, logical(1)))) {
out <- lapply(out, function(x) Filter(length, x))
} else {
out <- Filter(length, out)
}
if (!is.null(names(out))) {
df <- data.frame(out, stringsAsFactors = FALSE)
} else {
df <- do.call(rbind.fill, lapply(out, data.frame, stringsAsFactors = FALSE))
}
row.names(df) <- NULL
if ("parentid" %in% names(df)) df <- sort_df(df, "parentid")
row.names(df) <- NULL
data.frame(input = y, df, stringsAsFactors = FALSE)
}
}
get_response <- function(args, url, ...){
cli <- crul::HttpClient$new(url = url)
out <- cli$get(query = args, ...)
out$raise_for_status()
stopifnot(out$headers$`content-type` == 'text/html; charset=utf-8')
return(out)
}
b_GET <- function(url, args, ...){
cli <- crul::HttpClient$new(url = url)
out <- cli$get(query = args, ...)
out$raise_for_status()
if (grepl("html", out$response_headers$`content-type`)) {
stop(out$parse("UTF-8"))
}
return(out)
}
bold/R/bold_seq.R 0000644 0001762 0000144 00000004277 13134212526 013317 0 ustar ligges users #' Search BOLD for sequences.
#'
#' Get sequences for a taxonomic name, id, bin, container, institution,
#' researcher, geographic, place, or gene.
#'
#' @importFrom stringr str_replace_all str_replace str_split
#' @export
#' @template args
#' @template otherargs
#' @references
#' \url{http://v4.boldsystems.org/index.php/resources/api?type=webservices}
#'
#' @param marker (character) Returns all records containing matching
#' marker codes.
#'
#' @return A list with each element of length 4 with slots for id, name,
#' gene, and sequence.
#'
#' @examples \dontrun{
#' res <- bold_seq(taxon='Coelioxys')
#' bold_seq(taxon='Aglae')
#' bold_seq(taxon=c('Coelioxys','Osmia'))
#' bold_seq(ids='ACRJP618-11')
#' bold_seq(ids=c('ACRJP618-11','ACRJP619-11'))
#' bold_seq(bin='BOLD:AAA5125')
#' bold_seq(container='ACRJP')
#' bold_seq(researchers='Thibaud Decaens')
#' bold_seq(geo='Ireland')
#' bold_seq(geo=c('Ireland','Denmark'))
#'
#' # Return the http response object for detailed Curl call response details
#' res <- bold_seq(taxon='Coelioxys', response=TRUE)
#' res$url
#' res$status_code
#' res$response_headers
#'
#' ## curl debugging
#' ### You can do many things, including get verbose output on the curl
#' ### call, and set a timeout
#' bold_seq(taxon='Coelioxys', verbose = TRUE)[1:2]
#' # bold_seqspec(taxon='Coelioxys', timeout_ms = 10)
#' }
bold_seq <- function(taxon = NULL, ids = NULL, bin = NULL, container = NULL,
institutions = NULL, researchers = NULL, geo = NULL, marker = NULL,
response=FALSE, ...) {
args <- bc(
list(
taxon = pipeornull(taxon), geo = pipeornull(geo),
ids = pipeornull(ids), bin = pipeornull(bin),
container = pipeornull(container),
institutions = pipeornull(institutions),
researchers = pipeornull(researchers), marker = pipeornull(marker)
)
)
check_args_given_nonempty(
args,
c('taxon','ids','bin','container','institutions','researchers',
'geo','marker')
)
out <- b_GET(paste0(bbase(), 'API_Public/sequence'), args, ...)
if (response) {
out
} else {
tt <- out$parse("UTF-8")
#tt <- rawToChar(content(out, encoding = "UTF-8"))
res <- strsplit(tt, ">")[[1]][-1]
lapply(res, split_fasta)
}
}
bold/vignettes/ 0000755 0001762 0000144 00000000000 13134420302 013172 5 ustar ligges users bold/vignettes/bold_vignette.Rmd 0000644 0001762 0000144 00000047747 13134226657 016527 0 ustar ligges users
`bold` is an R package to connect to [BOLD Systems](http://www.boldsystems.org/) via their API. Functions in `bold` let you search for sequence data, specimen data, sequence + specimen data, and download raw trace files.
### bold info
+ [BOLD home page](http://boldsystems.org/)
+ [BOLD API docs](http://v4.boldsystems.org/index.php/api_home)
### Using bold
**Install**
Install `bold` from CRAN
```r
install.packages("bold")
```
Or install the development version from GitHub
```r
devtools::install_github("ropensci/bold")
```
Load the package
```r
library("bold")
```
### Search for taxonomic names via names
`bold_tax_name` searches for names with names.
```r
bold_tax_name(name = 'Diplura')
#> input taxid taxon tax_rank tax_division parentid parentname
#> 1 Diplura 591238 Diplura order Animals 82 Insecta
#> 2 Diplura 603673 Diplura genus Protists 53974 Scytosiphonaceae
#> taxonrep
#> 1 Diplura
#> 2
```
```r
bold_tax_name(name = c('Diplura', 'Osmia'))
#> input taxid taxon tax_rank tax_division parentid parentname
#> 1 Diplura 591238 Diplura order Animals 82 Insecta
#> 2 Diplura 603673 Diplura genus Protists 53974 Scytosiphonaceae
#> 3 Osmia 4940 Osmia genus Animals 4962 Megachilinae
#> taxonrep
#> 1 Diplura
#> 2
#> 3 Osmia
```
### Search for taxonomic names via BOLD identifiers
`bold_tax_id` searches for names with BOLD identifiers.
```r
bold_tax_id(id = 88899)
#> input taxid taxon tax_rank tax_division parentid parentname
#> 1 88899 88899 Momotus genus Animals 88898 Momotidae
```
```r
bold_tax_id(id = c(88899, 125295))
#> input taxid taxon tax_rank tax_division parentid parentname
#> 1 88899 88899 Momotus genus Animals 88898 Momotidae
#> 2 125295 125295 Helianthus genus Plants 100962 Asteraceae
```
### Search for sequence data only
The BOLD sequence API gives back sequence data, with a bit of metadata.
The default is to get a list back
```r
bold_seq(taxon = 'Coelioxys')[1:2]
#> [[1]]
#> [[1]]$id
#> [1] "FBAPB491-09"
#>
#> [[1]]$name
#> [1] "Coelioxys conica"
#>
#> [[1]]$gene
#> [1] "FBAPB491-09"
#>
#> [[1]]$sequence
#> [1] "---------------------ACCTCTTTAAGAATAATTATTCGTATAGAAATAAGAATTCCAGGATCTTGAATTAATAATGATCAAATTTATAACTCCTTTATTACAGCACATGCATTTTTAATAATTTTTTTTTTAGTTATACCTTTTCTTATTGGAGGATTTGGAAATTGATTAGTACCTTTAATATTAGGATCACCAGATATAGCTTTCCCACGAATAAATAATATTAGATTTTGATTATTACCTCCTTCTTTATTAATATTATTATTAAGTAATTTAATAAATCCCAGACCAGGAACAGGCTGAACAGTTTATCCTCCTTTATCTTTATACACATACCACCCTTCTCCCTCAGTTGATTTAGCAATTTTTTCACTACATCTATCAGGAATCTCTTCTATTATTGGATCTATAAATTTTATTGTTACAATTTTAATAATAAAAAACTTTTCAATAAATTATAATCAAATACCATTATTCCCATGATCTATTTTAATTACTACTATTTTATTATTATTATCACTACCTGTATTAGCTGGTGCTATTACTATATTATTATTTGATCGAAATTTAAATTCTTCTTTTTTTGACCCTATAGGAGGAGGAGACCCAATTTTATACCAACATTTA"
#>
#>
#> [[2]]
#> [[2]]$id
#> [1] "FBAPC351-10"
#>
#> [[2]]$name
#> [1] "Coelioxys afra"
#>
#> [[2]]$gene
#> [1] "FBAPC351-10"
#>
#> [[2]]$sequence
#> [1] "---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ACGAATAAATAATGTAAGATTTTGACTATTACCTCCCTCAATTTTCTTATTATTATCAAGAACCCTAATTAACCCAAGAGCTGGTACTGGATGAACTGTATATCCTCCTTTATCCTTATATACATTTCATGCCTCACCTTCCGTTGATTTAGCAATTTTTTCACTTCATTTATCAGGAATTTCATCAATTATTGGATCAATAAATTTTATTGTTACAATCTTAATAATAAAAAATTTTTCTTTAAATTATAGACAAATACCATTATTTTCATGATCAGTTTTAATTACTACAATTTTACTTTTATTATCATTACCAATTTTAGCTGGAGCAATTACTATACTCCTATTTGATCGAAATTTAAATACCTCATTCTTTGACCCAATAGGAGGAGGAGATCCAATTTTATATCAACATTTATTT"
```
You can optionally get back the `httr` response object
```r
res <- bold_seq(taxon = 'Coelioxys', response = TRUE)
res$headers
#> $date
#> [1] "Tue, 15 Sep 2015 20:02:31 GMT"
#>
#> $server
#> [1] "Apache/2.2.15 (Red Hat)"
#>
#> $`x-powered-by`
#> [1] "PHP/5.3.15"
#>
#> $`content-disposition`
#> [1] "attachment; filename=fasta.fas"
#>
#> $connection
#> [1] "close"
#>
#> $`transfer-encoding`
#> [1] "chunked"
#>
#> $`content-type`
#> [1] "application/x-download"
#>
#> attr(,"class")
#> [1] "insensitive" "list"
```
You can do geographic searches
```r
bold_seq(geo = "USA")
#> [[1]]
#> [[1]]$id
#> [1] "GBAN1777-08"
#>
#> [[1]]$name
#> [1] "Macrobdella decora"
#>
#> [[1]]$gene
#> [1] "GBAN1777-08"
#>
#> [[1]]$sequence
#> [1] "---------------------------------ATTGGAATCTTGTATTTCTTATTAGGTACATGATCTGCTATAGTAGGGACCTCTATA---AGAATAATTATTCGAATTGAATTAGCTCAACCTGGGTCGTTTTTAGGAAAT---GATCAAATTTACAATACTATTGTTACTGCTCATGGATTAATTATAATTTTTTTTATAGTAATACCTATTTTAATTGGAGGGTTTGGTAATTGATTAATTCCGCTAATA---ATTGGTTCTCCTGATATAGCTTTTCCACGTCTTAATAATTTAAGATTTTGATTACTTCCGCCATCTTTAACTATACTTTTTTGTTCATCTATAGTCGAAAATGGAGTAGGTACTGGATGGACTATTTACCCTCCTTTAGCAGATAACATTGCTCATTCTGGACCTTCTGTAGATATA---GCAATTTTTTCACTTCATTTAGCTGGTGCTTCTTCTATTTTAGGTTCATTAAATTTTATTACTACTGTAGTTAATATACGATGACCAGGGATATCTATAGAGCGAATTCCTTTATTTATTTGATCCGTAATTATTACTACTGTATTGCTATTATTATCTTTACCAGTATTAGCAGCT---GCTATTTCAATATTATTAACAGATCGTAACTTAAATACTAGATTTTTTGACCCAATAGGAGGAGGGGATCCTATTTTATTCCAACATTTATTTTGATTTTTTGGCCACCCTGAAGTTTATATTTTAATTTTACCAGGATTTGGAGCTATTTCTCATGTAGTAAGTCATAACTCT---AAAAAATTAGAACCGTTTGGATCATTAGGGATATTATATGCAATAATTGGAATTGCAATTTTAGGTTTTATTGTTTGAGCACATCATATATTTACAGTAGGTCTTGATGTAGATACACGAGCTTATTTTACAGCAGCTACAATAGTTATTGCTGTTCCTACAGGAATTAAAGTATTTAGGTGATTG---GCAACT"
#>
#>
#> [[2]]
#> [[2]]$id
#> [1] "GBAN1780-08"
#>
#> [[2]]$name
#> [1] "Haemopis terrestris"
#>
#> [[2]]$gene
#> [1] "GBAN1780-08"
#>
#> [[2]]$sequence
#> [1] "---------------------------------ATTGGAACWTTWTATTTTATTTTNGGNGCTTGATCTGCTATATTNGGGATCTCAATA---AGGAATATTATTCGAATTGAGCCATCTCAACCTGGGAGATTATTAGGAAAT---GATCAATTATATAATTCATTAGTAACAGCTCATGGATTAATTATAATTTTCTTTATGGTTATGCCTATTTTGATTGGTGGGTTTGGTAATTGATTACTACCTTTAATA---ATTGGAGCCCCTGATATAGCTTTTCCTCGATTAAATAATTTAAGTTTTTGATTATTACCACCTTCATTAATTATATTGTTAAGATCCTCTATTATTGAAAGAGGGGTAGGTACAGGTTGAACCTTATATCCTCCTTTAGCAGATAGATTATTTCATTCAGGTCCATCGGTAGATATA---GCTATTTTTTCATTACATATAGCTGGAGCATCATCTATTTTAGGCTCATTAAACTTTATTTCTACAATTATTAATATACGAATTAAAGGTATAAGATCTGATCGAGTACCTTTATTTGTATGATCAGTTGTTATTACAACAGTTCTGTTATTATTGTCTTTACCTGTTTTAGCTGCA---GCTATTACTATATTATTAACAGATCGTAATTTAAATACTACTTTTTTTGATCCTATAGGAGGTGGAGATCCAGTATTGTTTCAACACTTATTTTGATTTTTTGGTCATCCAGAAGTATATATTTTGATTTTACCAGGATTTGGAGCAATTTCTCATATTATTACAAATAATTCT---AAAAAATTGGAACCTTTTGGATCTCTTGGTATAATTTATGCTATAATTGGAATTGCAGTTTTAGGGTTTATTGTATGAGCCCATCATATATTTACTGTAGGATTAGATGTTGATACTCGAGCTTATTTTACAGCAGCTACTATAGTTATTGCTGTTCCTACTGGTATTAAAGTTTTTAGGTGATTA---GCAACA"
#>
#>
#> [[3]]
#> [[3]]$id
#> [1] "GBNM0293-06"
#>
#> [[3]]$name
#> [1] "Steinernema carpocapsae"
#>
#> [[3]]$gene
#> [1] "GBNM0293-06"
#>
#> [[3]]$sequence
#> [1] "---------------------------------------------------------------------------------ACAAGATTATCTCTTATTATTCGTTTAGAGTTGGCTCAACCTGGTCTTCTTTTGGGTAAT---GGTCAATTATATAATTCTATTATTACTGCTCATGCTATTCTTATAATTTTTTTCATAGTTATACCTAGAATAATTGGTGGTTTTGGTAATTGAATATTACCTTTAATATTGGGGGCTCCTGATATAAGTTTTCCACGTTTGAATAATTTAAGTTTTTGATTGCTACCAACTGCTATATTTTTGATTTTAGATTCTTGTTTTGTTGACACTGGTTGTGGTACTAGTTGAACTGTTTATCCTCCTTTGAGG---ACTTTAGGTCACCCTGGYAGAAGTGTAGATTTAGCTATTTTTAGTCTTCATTGTGCAGGAATTAGCTCAATTTTAGGGGCTATTAATTTTATATGTACTACAAAAAATCTTCGTAGTAGTTCTATTTCTTTGGAACATATAAGACTTTTTGTTTGGGCTGTTTTTGTTACTGTTTTTTTATTAGTTTTATCTTTACCTGTTTTAGCTGGTGCTATTACTATGCTTTTAACAGACCGTAATTTAAATACTTCTTTTTTT------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"
#>
#>
#> [[4]]
#> [[4]]$id
#> [1] "NEONV108-11"
#>
#> [[4]]$name
#> [1] "Aedes thelcter"
#>
#> [[4]]$gene
#> [1] "NEONV108-11"
#>
#> [[4]]$sequence
#> [1] "AACTTTATACTTCATCTTCGGAGTTTGATCAGGAATAGTTGGTACATCATTAAGAATTTTAATTCGTGCTGAATTAAGTCAACCAGGTATATTTATTGGAAATGACCAAATTTATAATGTAATTGTTACAGCTCATGCTTTTATTATAATTTTCTTTATAGTTATACCTATTATAATTGGAGGATTTGGAAATTGACTAGTTCCTCTAATATTAGGAGCCCCAGATATAGCTTTCCCTCGAATAAATAATATAAGTTTTTGAATACTACCTCCCTCATTAACTCTTCTACTTTCAAGTAGTATAGTAGAAAATGGATCAGGAACAGGATGAACAGTTTATCCACCTCTTTCATCTGGAACTGCTCATGCAGGAGCCTCTGTTGATTTAACTATTTTTTCTCTTCATTTAGCCGGAGTTTCATCAATTTTAGGGGCTGTAAATTTTATTACTACTGTAATTAATATACGATCTGCAGGAATTACTCTTGATCGACTACCTTTATTCGTTTGATCTGTAGTAATTACAGCTGTTTTATTACTTCTTTCACTTCCTGTATTAGCTGGAGCTATTACAATACTATTAACTGATCGAAATTTAAATACATCTTTCTTTGATCCAATTGGAGGAGGAGACCCAATTTTATACCAACATTTATTT"
#>
#>
#> [[5]]
#> [[5]]$id
#> [1] "NEONV109-11"
#>
#> [[5]]$name
#> [1] "Aedes thelcter"
#>
#> [[5]]$gene
#> [1] "NEONV109-11"
#>
#> [[5]]$sequence
#> [1] "AACTTTATACTTCATCTTCGGAGTTTGATCAGGAATAGTTGGTACATCATTAAGAATTTTAATTCGTGCTGAATTAAGTCAACCAGGTATATTTATTGGAAATGACCAAATTTATAATGTAATTGTTACAGCTCATGCTTTTATTATAATTTTCTTTATAGTTATACCTATTATAATTGGAGGATTTGGAAATTGACTAGTTCCTCTAATATTAGGAGCCCCAGATATAGCTTTCCCTCGAATAAATAATATAAGTTTTTGAATACTACCTCCCTCATTAACTCTTCTACTTTCAAGTAGTATAGTAGAAAATGGGTCAGGAACAGGATGAACAGTTTATCCACCTCTTTCATCTGGAACTGCTCATGCAGGAGCCTCTGTTGATTTAACTATTTTTTCTCTTCATTTAGCCGGAGTTTCATCAATTTTAGGGGCTGTAAATTTTATTACTACTGTAATTAATATACGATCTGCAGGAATTACTCTTGATCGACTACCTTTATTCGTTTGATCTGTAGTAATTACAGCTGTTTTATTACTTCTTTCACTTCCTGTATTAGCTGGAGCTATTACAATACTATTAACTGATCGAAATTTAAATACATCTTTCTTTGACCCAATTGGAGGGGGAGACCCAATTTTATACCAACATTTATTT"
```
And you can search by researcher name
```r
bold_seq(researchers = 'Thibaud Decaens')[[1]]
#> $id
#> [1] "BGABA657-14"
#>
#> $name
#> [1] "Coleoptera"
#>
#> $gene
#> [1] "BGABA657-14"
#>
#> $sequence
#> [1] "ACACTCTATTTCATTTTCGGAGCTTGATCAGGAATAGTAGGAACTTCTTTAAGAATACTAATTCGATCTGAATTGGGAAACCCCGGCTCATTGATTGGGGATGATCAAATTTATAATGTTATTGTAACAGCCCATGCATTCATTATAATTTTTTTTATAGTAATACCGATCATAATAGGAGGTTTTGGAAATTGATTAGTCCCGCTAATATTAGGTGCCCCAGATATAGCATTTCCTCGAATAAATAATATAAGATTTTGACTTCTTCCGCCTTCATTAACTTTACTTATTATAAGAAGAATTGTAGAAAACGGGGCGGGAACAGGATGAACAGTTTACCCACCCCTCTCTTCTAACATTGCTCATAGAGGAGCCTCTGTAGATCTTGCAATTTTTAGATTACATTTAGCCGGTGTATCATCAATTTTAGGTGCAGTTAATTTTATTACAACTATTATTAATATACGACCTAAAGGAATAACATTTGATCGCATACCTTTATTTGTATGAGCTGTAGCTTTAACTGCATTACTTTTATTATTATCTTTACCAGTATTAGCAGGTGCAATTACAATACTTTTAACTGATCGA---------------------------------------"
```
by taxon IDs
```r
bold_seq(ids = c('ACRJP618-11', 'ACRJP619-11'))
#> [[1]]
#> [[1]]$id
#> [1] "ACRJP618-11"
#>
#> [[1]]$name
#> [1] "Lepidoptera"
#>
#> [[1]]$gene
#> [1] "ACRJP618-11"
#>
#> [[1]]$sequence
#> [1] "------------------------TTGAGCAGGCATAGTAGGAACTTCTCTTAGTCTTATTATTCGAACAGAATTAGGAAATCCAGGATTTTTAATTGGAGATGATCAAATCTACAATACTATTGTTACGGCTCATGCTTTTATTATAATTTTTTTTATAGTTATACCTATTATAATTGGAGGATTTGGTAATTGATTAGTTCCCCTTATACTAGGAGCCCCAGATATAGCTTTCCCTCGAATAAACAATATAAGTTTTTGGCTTCTTCCCCCTTCACTATTACTTTTAATTTCCAGAAGAATTGTTGAAAATGGAGCTGGAACTGGATGAACAGTTTATCCCCCACTGTCATCTAATATTGCCCATAGAGGTACATCAGTAGATTTAGCTATTTTTTCTTTACATTTAGCAGGTATTTCCTCTATTTTAGGAGCGATTAATTTTATTACTACAATTATTAATATACGAATTAACAGTATAAATTATGATCAAATACCACTATTTGTGTGATCAGTAGGAATTACTGCTTTACTCTTATTACTTTCTCTTCCAGTATTAGCAGGTGCTATCACTATATTATTAACGGATCGAAATTTAAATACATCATTTTTTGATCCTGCAGGAGGAGGAGATCCAATTTTATATCAACATTTATTT"
#>
#>
#> [[2]]
#> [[2]]$id
#> [1] "ACRJP619-11"
#>
#> [[2]]$name
#> [1] "Lepidoptera"
#>
#> [[2]]$gene
#> [1] "ACRJP619-11"
#>
#> [[2]]$sequence
#> [1] "AACTTTATATTTTATTTTTGGTATTTGAGCAGGCATAGTAGGAACTTCTCTTAGTCTTATTATTCGAACAGAATTAGGAAATCCAGGATTTTTAATTGGAGATGATCAAATCTACAATACTATTGTTACGGCTCATGCTTTTATTATAATTTTTTTTATAGTTATACCTATTATAATTGGAGGATTTGGTAATTGATTAGTTCCCCTTATACTAGGAGCCCCAGATATAGCTTTCCCTCGAATAAACAATATAAGTTTTTGGCTTCTTCCCCCTTCACTATTACTTTTAATTTCCAGAAGAATTGTTGAAAATGGAGCTGGAACTGGATGAACAGTTTATCCCCCACTGTCATCTAATATTGCCCATAGAGGTACATCAGTAGATTTAGCTATTTTTTCTTTACATTTAGCAGGTATTTCCTCTATTTTAGGAGCGATTAATTTTATTACTACAATTATTAATATACGAATTAACAGTATAAATTATGATCAAATACCACTATTTGTGTGATCAGTAGGAATTACTGCTTTACTCTTATTACTTTCTCTTCCAGTATTAGCAGGTGCTATCACTATATTATTAACGGATCGAAATTTAAATACATCATTTTTTGATCCTGCAGGAGGAGGAGATCCAATTTTATATCAACATTTATTT"
```
by container (containers include project codes and dataset codes)
```r
bold_seq(container = 'ACRJP')[[1]]
#> $id
#> [1] "ACRJP003-09"
#>
#> $name
#> [1] "Lepidoptera"
#>
#> $gene
#> [1] "ACRJP003-09"
#>
#> $sequence
#> [1] "AACATTATATTTTATTTTTGGGATCTGATCTGGAATAGTAGGGACATCTTTAAGTATACTAATTCGAATAGAACTAGGAAATCCTGGATGTTTAATTGGGGATGATCAAATTTATAATACTATTGTTACAGCTCATGCTTTTATTATAATTTTTTTTATAGTTATACCCATTATAATTGGAGGTTTTGGCAATTGACTTGTACCATTAATATTAGGAGCCCCTGATATAGCATTTCCCCGAATAAATAATATAAGATTTTGACTTCTTCCCCCCTCATTAATTTTATTAATTTCAAGAAGAATTGTTGAAAATGGAGCAGGAACAGGATGAACAGTCTATCCTCCATTATCTTCTAATATTGCGCATAGAGGATCCTCTGTTGATTTAGCTATTTTCTCACTTCATTTAGCAGGAATTTCTTCTATTTTAGGAGCAATTAATTTTATTACAACTATTATTAATATACGAATAAATAATTTACTTTTTGACCAAATACCTCTATTTGTTTGAGCAGTAGGTATTACAGCTGTTCTTCTTTTATTATCATTACCAGTATTAGCAGGAGCAATTACCATACTATTAACAGATCGTAATTTAAATACTTCTTTCTTTGATCCTGCTGGAGGAGGAGATCCAATTTTATACCAACATTTATTT"
```
by bin (a bin is a _Barcode Index Number_)
```r
bold_seq(bin = 'BOLD:AAA5125')[[1]]
#> $id
#> [1] "BLPAB406-06"
#>
#> $name
#> [1] "Eacles ormondei"
#>
#> $gene
#> [1] "BLPAB406-06"
#>
#> $sequence
#> [1] "AACTTTATATTTTATTTTTGGAATTTGAGCAGGTATAGTAGGAACTTCTTTAAGATTACTAATTCGAGCAGAATTAGGTACCCCCGGATCTTTAATTGGAGATGACCAAATTTATAATACCATTGTAACAGCTCATGCTTTTATTATAATTTTTTTTATAGTTATACCTATTATAATTGGAGGATTTGGAAATTGATTAGTACCCCTAATACTAGGAGCTCCTGATATAGCTTTCCCCCGAATAAATAATATAAGATTTTGACTATTACCCCCATCTTTAACTCTTTTAATTTCTAGAAGAATTGTCGAAAATGGAGCTGGAACTGGATGAACAGTTTATCCCCCCCTTTCATCTAATATTGCTCATGGAGGCTCTTCTGTTGATTTAGCTATTTTTTCCCTTCATCTAGCTGGAATCTCATCAATTTTAGGAGCTATTAATTTTATCACAACAATCATTAATATACGACTAAATAATATAATATTTGACCAAATACCTTTATTTGTATGAGCTGTTGGTATTACAGCATTTCTTTTATTGTTATCTTTACCTGTACTAGCTGGAGCTATTACTATACTTTTAACAGATCGAAACTTAAATACATCATTTTTTGACCCAGCAGGAGGAGGAGATCCTATTCTCTATCAACATTTATTT"
```
And there are more ways to query, check out the docs for `?bold_seq`.
### Search for specimen data only
The BOLD specimen API doesn't give back sequences, only specimen data. By default you download `tsv` format data, which is given back to you as a `data.frame`
```r
res <- bold_specimens(taxon = 'Osmia')
head(res[,1:8])
#> processid sampleid recordID catalognum fieldnum
#> 1 ASGCB261-13 BIOUG07489-F10 3955538 BIOUG07489-F10
#> 2 BCHYM1499-13 BC ZSM HYM 19359 4005348 BC ZSM HYM 19359 BC ZSM HYM 19359
#> 3 BCHYM412-13 BC ZSM HYM 18272 3896353 BC ZSM HYM 18272 BC ZSM HYM 18272
#> 4 BCHYM413-13 BC ZSM HYM 18273 3896354 BC ZSM HYM 18273 BC ZSM HYM 18273
#> 5 FBAPB706-09 BC ZSM HYM 02181 1289067 BC ZSM HYM 02181 BC ZSM HYM 02181
#> 6 FBAPB730-09 BC ZSM HYM 02205 1289091 BC ZSM HYM 02205 BC ZSM HYM 02205
#> institution_storing bin_uri phylum_taxID
#> 1 Biodiversity Institute of Ontario BOLD:AAB8874 20
#> 2 SNSB, Zoologische Staatssammlung Muenchen BOLD:AAD6282 20
#> 3 SNSB, Zoologische Staatssammlung Muenchen BOLD:AAP2416 20
#> 4 SNSB, Zoologische Staatssammlung Muenchen BOLD:AAP2416 20
#> 5 SNSB, Zoologische Staatssammlung Muenchen BOLD:AAE4126 20
#> 6 SNSB, Zoologische Staatssammlung Muenchen BOLD:AAK5820 20
```
You can optionally get back the data in `XML` format
```r
bold_specimens(taxon = 'Osmia', format = 'xml')
```
```r
1470124
BOM1525-10
BOLD:AAN3337
DHB 1011
DHB 1011
DHB1011
Marjorie Barrick Museum
```
You can choose to get the `httr` response object back if you'd rather work with the raw data returned from the BOLD API.
```r
res <- bold_specimens(taxon = 'Osmia', format = 'xml', response = TRUE)
res$url
#> [1] "http://v4.boldsystems.org/index.php/API_Public/specimen?taxon=Osmia&format=xml"
res$status_code
#> [1] 200
res$headers
#> NULL
```
### Search for specimen plus sequence data
The specimen/sequence combined API gives back specimen and sequence data. Like the specimen API, this one gives by default `tsv` format data, which is given back to you as a `data.frame`. Here, we're setting `sepfasta=TRUE` so that the sequence data is given back as a list, and taken out of the `data.frame` returned so the `data.frame` is more manageable.
```r
res <- bold_seqspec(taxon = 'Osmia', sepfasta = TRUE)
res$fasta[1:2]
#> $`ASGCB261-13`
#> [1] "AATTTTATATATAATTTTTGCTATATGATCAGGAATAATTGGTTCAGCAATAAGAATTATTATTCGAATAGAATTAAGAATTCCTGGTTCATGAATTTCAAATGATCAAACTTATAATTCTTTAGTTACTGCTCATGCTTTTTTAATAATTTTTTTCTTAGTTATACCATTCTTAATTGGGGGATTTGGAAATTGATTAATTCCTTTAATATTAGGAATTCCAGATATAGCATTTCCACGAATAAATAATATTAGATTTTGACTTTTACCTCCTTCTTTAATACTTTTATTATTAAGAAATTTTATAAATCCTAGTCCAGGAACTGGATGAACTGTTTATCCACCTTTATCTTCTCATTTATTTCATTCTTCTCCTTCAGTTGATATAGCTATTTTTTCTTTACATATTTCTGGTTTATCTTCTATTATAGGTTCATTAAATTTTATTGTTACAATTATTATAATAAAAAATATTTCTTTAAAACATATTCAATTACCTTTATTTCCTTGATCTGTCTTTATTACTACTATTTTATTACTTTTTTCTTTACCTGTTTTAGCAGGTGCAATTACTATATTATTATTTGATCGAAATTTTAATACTTCATTTTTTGATCCTACAGGAGGAGGAGATCCTATTCTTTATCAACATTTATTT"
#>
#> $`BCHYM1499-13`
#> [1] "AATTCTTTACATAATTTTTGCTTTATGATCTGGAATAATTGGGTCAGCAATAAGAATTATTATTCGAATAGAATTAAGTATCCCAGGTTCATGAATTACTAATGATCAAATTTATAATTCTTTAGTAACTGCACATGCTTTTTTAATAATTTTTTTTCTTGTGATACCATTTTTAATTGGAGGATTTGGAAATTGATTAATTCCTTTAATATTAGGAATTCCAGATATAGCTTTCCCACGAATAAACAATATTAGATTTTGATTATTACCGCCATCTTTAATATTATTACTTTTAAGAAATTTTTTAAATCCAAGTCCTGGAACAGGATGAACAGTTTATCCCCCTTTATCATCAAATTTATTTCATTCTTCTCCTTCAGTTGATTTAGCAATTTTTTCTTTACATATTTCAGGTTTATCTTCTATTATAGGTTCATTAAATTTTATTGTTACAATTATTATAATAAAAAATATTTCTTTAAAATATATTCAATTGCCTTTATTTCCTTGATCTGTATTTATTACTACTATTCTTTTATTATTTTCTTTACCTGTGTTAGCTGGAGCTATTACTATATTATTATTTGATCGAAATTTTAATACATCTTTTTTTGATCCTACAGGAGGAGGAGATCCAATTCTTTATCAACATTTATTT"
```
Or you can index to a specific sequence like
```r
res$fasta['GBAH0293-06']
#> $`GBAH0293-06`
#> [1] "------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------TTAATGTTAGGGATTCCAGATATAGCTTTTCCACGAATAAATAATATTAGATTTTGACTGTTACCTCCATCTTTAATATTATTACTTTTAAGAAATTTTTTAAATCCAAGTCCTGGAACAGGATGAACAGTTTATCCTCCTTTATCATCAAATTTATTTCATTCTTCTCCTTCAGTTGATTTAGCAATTTTTTCTTTACATATTTCAGGTTTATCTTCTATTATAGGTTCATTAAATTTTATTGTTACAATTATTATAATAAAAAATATTTCTTTAAAATATATTCAATTACCTTTATTTTCTTGATCTGTATTTATTACTACTATTCTTTTATTATTTTCTTTACCTGTATTAGCTGGAGCTATTACTATATTATTATTTGATCGAAATTTTAATACATCTTTTTTTGATCCAACAGGAGGGGGAGATCCAATTCTTTATCAACATTTATTTTGATTTTTTGGTCATCCTGAAGTTTATATTTTAATTTTACCTGGATTTGGATTAATTTCTCAAATTATTTCTAATGAAAGAGGAAAAAAAGAAACTTTTGGAAATATTGGTATAATTTATGCTATATTAAGAATTGGACTTTTAGGTTTTATTGTT---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"
```
### Get trace files
This function downloads files to your machine - it does not load them into your R session - but prints out where the files are for your information.
```r
bold_trace(taxon = 'Osmia', quiet = TRUE)
```
bold/README.md 0000644 0001762 0000144 00000022007 13134223100 012437 0 ustar ligges users bold
====
[](http://www.repostatus.org/#active)

`bold` accesses BOLD barcode data.
The Barcode of Life Data Systems (BOLD) is designed to support the generation and application of DNA barcode data. The platform consists of four main modules: a data portal, a database of barcode clusters, an educational portal, and a data collection workbench.
This package retrieves data from the BOLD database of barcode clusters, and allows for searching of over 1.7M public records using multiple search criteria including sequence data, specimen data, specimen *plus* sequence data, as well as trace files.
[Documentation for the BOLD API](http://v4.boldsystems.org/index.php/api_home).
## Package status and installation
[](https://ci.appveyor.com/project/ropensci/bold)
[](https://travis-ci.org/)
[](https://codecov.io/github/ropensci/bold?branch=master)
[](https://github.com/metacran/cranlogs.app)
__Installation instructions__
__Stable Version__
```r
install.packages("bold")
```
__Development Version__
Install `sangerseqR` first
```r
source("http://bioconductor.org/biocLite.R")
biocLite("sangerseqR")
```
Then `bold`
```r
devtools::install_github("ropensci/bold")
```
## Usage
```r
library("bold")
```
### Search for sequence data only
Default is to get a list back
```r
bold_seq(taxon='Coelioxys')[[1]]
#> $id
#> [1] "BBHYL404-10"
#>
#> $name
#> [1] "Coelioxys rufitarsis"
#>
#> $gene
#> [1] "BBHYL404-10"
#>
#> $sequence
#> [1] "TATAATATATATAATTTTTGCAATATGATCAGGTATAATTGGATCATCTTTAAGAATAATTATTCGAATAGAATTAAGAATCCCAGGTTCATGAATTAGAAATGATCAAATTTATAATTCTTTTATTACAGCACATGCATTTTTAATAATTTTTTTTTTAGTTATGCCTTTTCTAATTGGGGGATTTGGTAATTGATTAACACCATTAATACTTGGAGCTCCTGATATAGCTTTCCCCCGAATAAACAATATTAGATTTTGACTACTCCCACCTTCTTTATTACTTTTATTATCAAGAAATTTAATTAATCCAAGACCAGGAACAGGATGAACTGTTTATCCACCATTATCCTCTTATACATATCATCCATCTCCTTCTGTAGATTTAGCAATTTTTTCTTTACATTTATCAGGAATTTCCTCAATTATTGGATCAATAAATTTTATTGTTACAATTTTAATAATAAAAAATTATTCAATAAATTATAATCAAATACCATTATTCCCATGATCAGTTTTAATTACTACAATTTTATTATTACTATCACTTCCAGTATTAGCAGGAGCAATTACAATATTATTATTTGATCGAAATTTAAATTCTTCTTTTTTTGACCCAATAGGAGGAGGAGACCCAATTTTATATCAACATTTATTT\r"
```
You can optionally get back the `httr` response object
```r
res <- bold_seq(taxon='Coelioxys', response=TRUE)
res$response_headers
#> $status
#> [1] "HTTP/1.1 200 OK"
#>
#> $date
#> [1] "Thu, 20 Jul 2017 21:51:40 GMT"
#>
#> $server
#> [1] "Apache/2.2.15 (Red Hat)"
#>
#> $`x-powered-by`
#> [1] "PHP/5.3.15"
#>
#> $`content-disposition`
#> [1] "attachment; filename=fasta.fas"
#>
#> $connection
#> [1] "close"
#>
#> $`transfer-encoding`
#> [1] "chunked"
#>
#> $`content-type`
#> [1] "application/x-download"
```
### Search for specimen data only
By default you download `tsv` format data, which is given back to you as a `data.frame`
```r
res <- bold_specimens(taxon='Osmia')
head(res[,1:8])
#> processid sampleid recordID catalognum fieldnum
#> 1 ASGCB255-13 BIOUG07489-F04 3955532 BIOUG07489-F04
#> 2 ASGCB258-13 BIOUG07489-F07 3955535 BIOUG07489-F07
#> 3 BBHYA3298-12 BIOUG02688-A06 2711807 BIOUG02688-A06 L#11BIOBUS-2558
#> 4 BBHYL310-10 10BBCHY-3264 1769753 10BBCHY-3264 L#PC2010KT-025
#> 5 BCHYM1496-13 BC ZSM HYM 19356 4005345 BC ZSM HYM 19356 BC ZSM HYM 19356
#> 6 BCHYM412-13 BC ZSM HYM 18272 3896353 BC ZSM HYM 18272 BC ZSM HYM 18272
#> institution_storing collection_code
#> 1 Biodiversity Institute of Ontario NA
#> 2 Biodiversity Institute of Ontario NA
#> 3 University of Guelph, Centre for Biodiversity Genomics NA
#> 4 University of Guelph, Centre for Biodiversity Genomics NA
#> 5 SNSB, Zoologische Staatssammlung Muenchen NA
#> 6 SNSB, Zoologische Staatssammlung Muenchen NA
#> bin_uri
#> 1 BOLD:ABZ2181
#> 2 BOLD:AAC0884
#> 3 BOLD:ACF5858
#> 4 BOLD:AAC3295
#> 5 BOLD:AAI2010
#> 6 BOLD:AAP2416
```
### Search for specimen plus sequence data
By default you download `tsv` format data, which is given back to you as a `data.frame`
```r
res <- bold_seqspec(taxon='Osmia', sepfasta=TRUE)
res$fasta[1:2]
#> $`ASGCB255-13`
#> [1] "-------------------------------GGAATAATTGGTTCTGCTATAAGTATTATTATTCGAATAGAATTAAGAATTCCTGGATCATTCATTTCTAATGATCAAACTTATAATTCTTTAGTAACAGCTCATGCTTTTTTAATAATTTTTTTTCTTGTAATACCATTTTTAATTGGTGGATTTGGAAATTGATTAATTCCATTAATATTAGGAATCCCAGATATAGCATTTCCTCGAATAAATAATATTAGATTTTGACTTTTACCCCCATCCTTAATAATTTTACTTTTAAGAAATTTCTTAAATCCAAGTCCAGGAACAGGTTGAACTGTATATCCCCCCCTTTCTTCTTATTTATTTCATTCTTCCCCTTCTGTTGATTTAGCTATTTTTTCTCTTCATATTTCTGGTTTATCTTCCATCATAGGTTCTTTAAATTTTATTGTTACAATTATTATAATAAAAAATATTTCATTAAAACATATTCAATTACCTTTATTTCCTTGATCCGTTTTTATTACAACTATTTTACTATTATTTTCTTTACCTGTTCTAGCAGGAGCTATTACTATATTATTATTTGATCGAAACTTTAATACTTCATTTTTTGATCCAACTGGAGGAGGAGATCCAATTTTATATCAACATTTATTC"
#>
#> $`ASGCB258-13`
#> [1] "GATTTTATATATAATTTTTGCTATGTGATCAGGAATAATTGGTTCAGCAATAAGAATTATTATTCGAATAGAATTAAGAATTCCAGGTTCATGAATCTCTAATGATCAAATTTATAATTCTTTAGTTACTGCTCACGCTTTTTTAATAATTTTTTTTTTAGTAATACCATTTTTAATTGGAGGATTTGGTAATTGATTAGTTCCATTAATATTAGGAATTCCAGATATAGCATTTCCACGAATAAATAATATTAGATTTTGACTTTTACCTCCTTCTTTAATGTTATTACTTTTAAGAAATTTTTTAAATCCAAGTCCAGGAACTGGATGAACTGTATATCCTCCTCTTTCTTCTCATTTATTTCATTCTTCTCCTTCAGTTGATATAGCTATTTTTTCTTTACATATTTCTGGTTTATCTTCTATTATAGGTTCATTAAATTTTATTGTTACAATTATTATAATAAAAAATATTTCATTAAAACATATTCAATTGCCTTTATTTCCTTGATCTGTTTTTATTACTACTATTTTATTACTTTTTTCTTTACCTGTTTTAGCTGGAGCAATTACTATATTATTATTTGATCGAAATTTTAATACTTCATTTTTTGATCCGACAGGAGGTGGAGATCCAATTCTTTATCAACATTTATTT"
```
Or you can index to a specific sequence like
```r
res$fasta['GBAH0293-06']
#> $`GBAH0293-06`
#> [1] "------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------TTAATGTTAGGGATTCCAGATATAGCTTTTCCACGAATAAATAATATTAGATTTTGACTGTTACCTCCATCTTTAATATTATTACTTTTAAGAAATTTTTTAAATCCAAGTCCTGGAACAGGATGAACAGTTTATCCTCCTTTATCATCAAATTTATTTCATTCTTCTCCTTCAGTTGATTTAGCAATTTTTTCTTTACATATTTCAGGTTTATCTTCTATTATAGGTTCATTAAATTTTATTGTTACAATTATTATAATAAAAAATATTTCTTTAAAATATATTCAATTACCTTTATTTTCTTGATCTGTATTTATTACTACTATTCTTTTATTATTTTCTTTACCTGTATTAGCTGGAGCTATTACTATATTATTATTTGATCGAAATTTTAATACATCTTTTTTTGATCCAACAGGAGGGGGAGATCCAATTCTTTATCAACATTTATTTTGATTTTTTGGTCATCCTGAAGTTTATATTTTAATTTTACCTGGATTTGGATTAATTTCTCAAATTATTTCTAATGAAAGAGGAAAAAAAGAAACTTTTGGAAATATTGGTATAATTTATGCTATATTAAGAATTGGACTTTTAGGTTTTATTGTT---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"
```
### Get trace files
This function downloads files to your machine - it does not load them into your R session - but prints out where the files are for your information.
```r
x <- bold_trace(ids = 'ACRJP618-11', progress = FALSE)
read_trace(x$ab1)
#> Number of datapoints: 8877
#> Number of basecalls: 685
#>
#> Primary Basecalls: NNNNNNNNNNNNNNNNNNGNNNTTGAGCAGGNATAGTAGGANCTTCTCTTAGTCTTATTATTCGAACAGAATTAGGAAATCCAGGATTTTTAATTGGAGATGATCAAATCTACAATACTATTGTTACGGCTCATGCTTTTATTATAATTTTTTTTATAGTTATACCTATTATAATTGGAGGATTTGGTAATTGATTAGTTCCCCTTATACTAGGAGCCCCAGATATAGCTTTCCCTCGAATAAACAATATAAGTTTTTGGCTTCTTCCCCCTTCACTATTACTTTTAATTTCCAGAAGAATTGTTGAAAATGGAGCTGGAACTGGATGAACAGTTTATCCCCCACTGTCATCTAATATTGCCCATAGAGGTACATCAGTAGATTTAGCTATTTTTTCTTTACATTTAGCAGGTATTTCCTCTATTTTAGGAGCGATTAATTTTATTACTACAATTATTAATATACGAATTAACAGTATAAATTATGATCAAATACCACTATTTGTGTGATCAGTAGGAATTACTGCTTTACTCTTATTACTTTCTCTTCCAGTATTAGCAGGTGCTATCACTATATTATTAACGGATCGAAATTTAAATACATCATTTTTTGATCCTGCAGGAGGAGGAGATCCAATTTTATATCAACATTTATTTTGATTTTTTGGACNTCNNNNAAGTTTAAN
#>
#> Secondary Basecalls:
```
## Citation
Get citation information for `bold` in R by running: `citation(package = 'bold')`
## Code of Conduct
Please note that this project is released with a [Contributor Code of Conduct](CONDUCT.md).
By participating in this project you agree to abide by its terms.
[](https://ropensci.org)
bold/MD5 0000644 0001762 0000144 00000004265 13134504041 011504 0 ustar ligges users d9f0d0f86c57ec088ae6e2ccc75ba3ee *DESCRIPTION
c5af52351472a750055a760a8924ce71 *LICENSE
19dc97bc26681640044166206b3e8b26 *NAMESPACE
a143b032c7bab4a5641065d39bcde2f2 *NEWS.md
bca22309792f9e1bef76921cb712c956 *R/bold-package.R
86af3511a9fb16be45bd4a9aa65ec5bf *R/bold_filter.R
64f2b2e4baf7806e6c1882330e97e948 *R/bold_identify.R
f7575812735ace5257d663c64eff23c3 *R/bold_identify_parents.R
7d476be15469458fb3490e5f6751b2a5 *R/bold_seq.R
34757525c62643946a13bbdc88a2c499 *R/bold_seqspec.R
c04bfe652c63063579726cb9ac9b72d3 *R/bold_specimens.R
fb4a5c2495a64b58d9dbb930953926aa *R/bold_stats.R
5b1e9f8a6f26df5e328a32033c276ed2 *R/bold_tax_id.R
cbf8ecafcc3b5d2943be20908afa6b16 *R/bold_tax_name.R
3725c7e5034c9054eba152b29dab6a67 *R/bold_trace.R
c71c8a8fa5c0f0d39b74fb77e29783e9 *R/zzz.R
d3211d9455722a179f4b474123d260f2 *README.md
d24146de7dd680956b18b29d27d96e36 *build/vignette.rds
bb64a460c31e2e6821ac53870b09c38e *data/sequences.RData
4e8e66850d376db54b734f6459de4546 *inst/doc/bold_vignette.Rmd
83e40e7100860c0b34b9ad2259c9a05c *inst/doc/bold_vignette.html
1fc13c735a05dc20167294991f30e259 *man/bold-package.Rd
7025c073016140b45f74341a683a2c90 *man/bold_filter.Rd
2fc7738893d734266d23d2f0067b629c *man/bold_identify.Rd
bd4d479f09995ce3b37ed0e32a9c5a5a *man/bold_identify_parents.Rd
309ebec7ac23da6ba0bded7c985a71be *man/bold_seq.Rd
6ec8abe164beeb79ca6f2b2de7b31d3c *man/bold_seqspec.Rd
bd3feb5a07142dcd69faec4333cb9b41 *man/bold_specimens.Rd
ecdb0aa85bd247e984a4ca58b8530c42 *man/bold_stats.Rd
53b9c4a880cfa10f17aeb5e04f644d07 *man/bold_tax_id.Rd
ae740ce75ebf5f8118c443e6c1715a76 *man/bold_tax_name.Rd
302ca05fd77797d50f25d36149283649 *man/bold_trace.Rd
8b6eac4da649615fee64522edaf3bf0b *man/sequences.Rd
d9066883a8fecb16e80ceeef8323edac *tests/test-all.R
acc0cd3d6511852edf1c5b919b824f97 *tests/testthat/test-bold_identify.R
07443ab368120fb6f9e18e8a00323b9a *tests/testthat/test-bold_seq.R
c061b818529a0d00d7dab35fa77fea77 *tests/testthat/test-bold_seqspec.R
bf5e0d5ce0c5fd2201c8ccc261f88ca6 *tests/testthat/test-bold_specimens.R
670498c2dfc92d737a1fd81be132f464 *tests/testthat/test-bold_tax_id.R
2a174b1e7e11a116070defb4ac5fb4f4 *tests/testthat/test-bold_tax_name.R
4e8e66850d376db54b734f6459de4546 *vignettes/bold_vignette.Rmd
bold/build/ 0000755 0001762 0000144 00000000000 13134420302 012261 5 ustar ligges users bold/build/vignette.rds 0000644 0001762 0000144 00000000312 13134420302 014614 0 ustar ligges users b```b`fad`b2 1#'LI/LK-)IMASR S&);$7M `>DXYsS4楀aMwjey~L6̜T!%psQY_/ȷ
@?{49'ݣ\)%ziE@ w { bold/DESCRIPTION 0000644 0001762 0000144 00000002055 13134504041 012675 0 ustar ligges users Package: bold
Title: Interface to Bold Systems API
Description: A programmatic interface to the Web Service methods provided by
Bold Systems () for genetic 'barcode' data.
Functions include methods for searching by sequences by taxonomic names,
ids, collectors, and institutions; as well as a function for searching
for specimens, and downloading trace files.
Version: 0.5.0
License: MIT + file LICENSE
Authors@R: c(person("Scott", "Chamberlain", role = c("aut", "cre"),
email = "myrmecocystus@gmail.com"))
URL: https://github.com/ropensci/bold
BugReports: https://github.com/ropensci/bold/issues
VignetteBuilder: knitr
LazyData: yes
Imports: xml2, crul (>= 0.3.8), stringr, jsonlite, reshape, plyr,
data.table, tibble
Suggests: roxygen2 (>= 6.0.1), sangerseqR, knitr, testthat
RoxygenNote: 6.0.1
NeedsCompilation: no
Packaged: 2017-07-21 15:41:54 UTC; sacmac
Author: Scott Chamberlain [aut, cre]
Maintainer: Scott Chamberlain
Repository: CRAN
Date/Publication: 2017-07-21 23:02:56 UTC
bold/man/ 0000755 0001762 0000144 00000000000 13134207626 011751 5 ustar ligges users bold/man/bold_tax_name.Rd 0000644 0001762 0000144 00000003327 13134212756 015041 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/bold_tax_name.R
\name{bold_tax_name}
\alias{bold_tax_name}
\title{Search BOLD for taxonomy data by taxonomic name}
\usage{
bold_tax_name(name, fuzzy = FALSE, response = FALSE, ...)
}
\arguments{
\item{name}{(character) One or more scientific names. required.}
\item{fuzzy}{(logical) Whether to use fuzzy search or not (default: FALSE).}
\item{response}{(logical) Note that response is the object that returns
from the Curl call, useful for debugging, and getting detailed info on
the API call.}
\item{...}{Further args passed on to \code{\link[crul]{HttpClient}}, main
purpose being curl debugging}
}
\description{
Search BOLD for taxonomy data by taxonomic name
}
\details{
The \code{dataTypes} parameter is not supported in this function.
If you want to use that parameter, get an ID from this function and pass
it into \code{bold_tax_id}, and then use the \code{dataTypes} parameter.
}
\examples{
\dontrun{
bold_tax_name(name='Diplura')
bold_tax_name(name='Osmia')
bold_tax_name(name=c('Diplura','Osmia'))
bold_tax_name(name=c("Apis","Puma concolor","Pinus concolor"))
bold_tax_name(name='Diplur', fuzzy=TRUE)
bold_tax_name(name='Osm', fuzzy=TRUE)
## get http response object only
bold_tax_name(name='Diplura', response=TRUE)
bold_tax_name(name=c('Diplura','Osmia'), response=TRUE)
## Names with no data in BOLD database
bold_tax_name("Nasiaeshna pentacantha")
bold_tax_name(name = "Cordulegaster erronea")
bold_tax_name(name = "Cordulegaster erronea", response=TRUE)
## curl debugging
bold_tax_name(name='Diplura', verbose = TRUE)
}
}
\references{
\url{http://v4.boldsystems.org/index.php/resources/api?type=taxonomy}
}
\seealso{
\code{\link{bold_tax_id}}
}
bold/man/bold_filter.Rd 0000644 0001762 0000144 00000002374 13134226557 014537 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/bold_filter.R
\name{bold_filter}
\alias{bold_filter}
\title{Get BOLD specimen + sequence data.}
\usage{
bold_filter(x, by, how = "max")
}
\arguments{
\item{x}{(data.frame) a data.frame, as returned from
\code{\link{bold_seqspec}}. Note that some combinations of parameters
in \code{\link{bold_seqspec}} don't return a data.frame. Stops with
error message if this is not a data.frame. Required.}
\item{by}{(character) the column by which to group. For example,
if you want the longest sequence for each unique species name, then
pass \strong{species_name}. If the column doesn't exist, error
with message saying so. Required.}
\item{how}{(character) one of "max" or "min", which get used as
\code{which.max} or \code{which.min} to get the longest or shortest
sequence, respectively. Note that we remove gap/alignment characters
(\code{-})}
}
\value{
a tibble/data.frame
}
\description{
Get BOLD specimen + sequence data.
}
\examples{
\dontrun{
res <- bold_seqspec(taxon='Osmia')
maxx <- bold_filter(res, by = "species_name")
minn <- bold_filter(res, by = "species_name", how = "min")
vapply(maxx$nucleotides, nchar, 1, USE.NAMES = FALSE)
vapply(minn$nucleotides, nchar, 1, USE.NAMES = FALSE)
}
}
bold/man/bold_seq.Rd 0000644 0001762 0000144 00000005251 13134220504 014021 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/bold_seq.R
\name{bold_seq}
\alias{bold_seq}
\title{Search BOLD for sequences.}
\usage{
bold_seq(taxon = NULL, ids = NULL, bin = NULL, container = NULL,
institutions = NULL, researchers = NULL, geo = NULL, marker = NULL,
response = FALSE, ...)
}
\arguments{
\item{taxon}{(character) Returns all records containing matching taxa. Taxa
includes the ranks of phylum, class, order, family, subfamily, genus,
and species.}
\item{ids}{(character) Returns all records containing matching IDs. IDs
include Sample IDs, Process IDs, Museum IDs and Field IDs.}
\item{bin}{(character) Returns all records contained in matching BINs. A
BIN is defined by a Barcode Index Number URI.}
\item{container}{(character) Returns all records contained in matching
projects or datasets. Containers include project codes and dataset codes}
\item{institutions}{(character) Returns all records stored in matching
institutions. Institutions are the Specimen Storing Site.}
\item{researchers}{(character) Returns all records containing matching
researcher names. Researchers include collectors and specimen identifiers.}
\item{geo}{(character) Returns all records collected in matching geographic
sites. Geographic sites includes countries and province/states.}
\item{marker}{(character) Returns all records containing matching
marker codes.}
\item{response}{(logical) Note that response is the object that returns
from the Curl call, useful for debugging, and getting detailed info on
the API call.}
\item{...}{Further args passed on to \code{\link[crul]{HttpClient}}, main
purpose being curl debugging}
}
\value{
A list with each element of length 4 with slots for id, name,
gene, and sequence.
}
\description{
Get sequences for a taxonomic name, id, bin, container, institution,
researcher, geographic, place, or gene.
}
\examples{
\dontrun{
res <- bold_seq(taxon='Coelioxys')
bold_seq(taxon='Aglae')
bold_seq(taxon=c('Coelioxys','Osmia'))
bold_seq(ids='ACRJP618-11')
bold_seq(ids=c('ACRJP618-11','ACRJP619-11'))
bold_seq(bin='BOLD:AAA5125')
bold_seq(container='ACRJP')
bold_seq(researchers='Thibaud Decaens')
bold_seq(geo='Ireland')
bold_seq(geo=c('Ireland','Denmark'))
# Return the http response object for detailed Curl call response details
res <- bold_seq(taxon='Coelioxys', response=TRUE)
res$url
res$status_code
res$response_headers
## curl debugging
### You can do many things, including get verbose output on the curl
### call, and set a timeout
bold_seq(taxon='Coelioxys', verbose = TRUE)[1:2]
# bold_seqspec(taxon='Coelioxys', timeout_ms = 10)
}
}
\references{
\url{http://v4.boldsystems.org/index.php/resources/api?type=webservices}
}
bold/man/bold_identify_parents.Rd 0000644 0001762 0000144 00000003472 13134226557 016621 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/bold_identify_parents.R
\name{bold_identify_parents}
\alias{bold_identify_parents}
\title{Add taxonomic parent names to a data.frame}
\usage{
bold_identify_parents(x, wide = FALSE)
}
\arguments{
\item{x}{(data.frame/list) list of data.frames - the output from a call to
\code{\link{bold_identify}}. or a single data.frame from the output from
same. required.}
\item{wide}{(logical) output in long or wide format. See Details.
Default: \code{FALSE}}
}
\value{
a list of the same length as the input
}
\description{
Add taxonomic parent names to a data.frame
}
\details{
This function gets unique set of taxonomic names from the input
data.frame, then queries \code{\link{bold_tax_name}} to get the
taxonomic ID, passing it to \code{\link{bold_tax_id}} to get the parent
names, then attaches those to the input data.
Records in the input data that do not have matches for parent names
simply get NA values in the added columns.
}
\section{wide vs long format}{
When \code{wide = FALSE} you get many rows for each record. Essentially,
we \code{cbind} the taxonomic classification onto the one row from the
result of \code{\link{bold_identify}}, giving as many rows as there are
taxa in the taxonomic classification.
When \code{wide = TRUE} you get one row for each record - thus the
dimensions of the input data stay the same. For this option, we take just
the rows for taxonomic ID and name for each taxon in the taxonomic
classification, and name the columns by the taxon rank, so you get
\code{phylum} and \code{phylum_id}, and so on.
}
\examples{
\dontrun{
df <- bold_identify(sequences = sequences$seq2)
# long format
out <- bold_identify_parents(df)
str(out)
head(out[[1]])
# wide format
out <- bold_identify_parents(df, wide = TRUE)
str(out)
head(out[[1]])
}
}
bold/man/sequences.Rd 0000644 0001762 0000144 00000000705 13121306447 014232 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/bold-package.R
\docType{data}
\name{sequences}
\alias{sequences}
\title{List of 3 nucleotide sequences to use in examples for the
\code{\link{bold_identify}} function}
\description{
List of 3 nucleotide sequences to use in examples for the
\code{\link{bold_identify}} function
}
\details{
Each sequence is a character string, of lengths 410, 600, and 696.
}
\keyword{data}
bold/man/bold_tax_id.Rd 0000644 0001762 0000144 00000003644 13134212756 014517 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/bold_tax_id.R
\name{bold_tax_id}
\alias{bold_tax_id}
\title{Search BOLD for taxonomy data by BOLD ID.}
\usage{
bold_tax_id(id, dataTypes = "basic", includeTree = FALSE,
response = FALSE, ...)
}
\arguments{
\item{id}{(integer) One or more BOLD taxonomic identifiers. required.}
\item{dataTypes}{(character) Specifies the datatypes that will be
returned. 'all' returns all data. 'basic' returns basic taxon information.
'images' returns specimen images.}
\item{includeTree}{(logical) If TRUE (default: FALSE), returns a list
containing information for parent taxa as well as the specified taxon.}
\item{response}{(logical) Note that response is the object that returns
from the Curl call, useful for debugging, and getting detailed info on
the API call.}
\item{...}{Further args passed on to \code{\link[crul]{HttpClient}}, main
purpose being curl debugging}
}
\description{
Search BOLD for taxonomy data by BOLD ID.
}
\examples{
\dontrun{
bold_tax_id(id=88899)
bold_tax_id(id=88899, includeTree=TRUE)
bold_tax_id(id=88899, includeTree=TRUE, dataTypes = "stats")
bold_tax_id(id=c(88899,125295))
## dataTypes parameter
bold_tax_id(id=88899, dataTypes = "basic")
bold_tax_id(id=88899, dataTypes = "stats")
bold_tax_id(id=88899, dataTypes = "images")
bold_tax_id(id=88899, dataTypes = "geo")
bold_tax_id(id=88899, dataTypes = "sequencinglabs")
bold_tax_id(id=88899, dataTypes = "depository")
bold_tax_id(id=c(88899,125295), dataTypes = "geo")
bold_tax_id(id=c(88899,125295), dataTypes = "images")
## Passing in NA
bold_tax_id(id = NA)
bold_tax_id(id = c(88899,125295,NA))
## get http response object only
bold_tax_id(id=88899, response=TRUE)
bold_tax_id(id=c(88899,125295), response=TRUE)
## curl debugging
bold_tax_id(id=88899, verbose = TRUE)
}
}
\references{
\url{http://v4.boldsystems.org/index.php/resources/api?type=taxonomy}
}
\seealso{
\code{bold_tax_name}
}
bold/man/bold-package.Rd 0000644 0001762 0000144 00000002634 13134212756 014556 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/bold-package.R
\docType{package}
\name{bold-package}
\alias{bold-package}
\alias{bold}
\title{bold: A programmatic interface to the Barcode of Life data.}
\description{
bold: A programmatic interface to the Barcode of Life data.
}
\section{About}{
This package gives you access to data from BOLD System
\url{http://www.boldsystems.org/} via their API
(\url{http://v4.boldsystems.org/index.php/api_home})
}
\section{Functions}{
\itemize{
\item \code{\link{bold_specimens}} - Search for specimen data.
\item \code{\link{bold_seq}} - Search for and retrieve sequences.
\item \code{\link{bold_seqspec}} - Get sequence and specimen data together.
\item \code{\link{bold_trace}} - Get trace files - saves to disk.
\item \code{\link{read_trace}} - Read trace files into R.
\item \code{\link{bold_tax_name}} - Get taxonomic names via input names.
\item \code{\link{bold_tax_id}} - Get taxonomic names via BOLD identifiers.
\item \code{\link{bold_identify}} - Search for match given a COI sequence.
}
Interestingly, they provide xml and tsv format data for the specimen data,
while they provide fasta data format for the sequence data. So for the
specimen data you can get back raw XML, or a data frame parsed from the
tsv data, while for sequence data you get back a list (b/c sequences are
quite long and would make a data frame unwieldy).
}
bold/man/bold_trace.Rd 0000644 0001762 0000144 00000005116 13134226557 014345 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/bold_trace.R
\name{bold_trace}
\alias{bold_trace}
\alias{read_trace}
\title{Get BOLD trace files}
\usage{
bold_trace(taxon = NULL, ids = NULL, bin = NULL, container = NULL,
institutions = NULL, researchers = NULL, geo = NULL, marker = NULL,
dest = NULL, overwrite = TRUE, progress = TRUE, ...)
read_trace(x)
}
\arguments{
\item{taxon}{(character) Returns all records containing matching taxa. Taxa
includes the ranks of phylum, class, order, family, subfamily, genus,
and species.}
\item{ids}{(character) Returns all records containing matching IDs. IDs
include Sample IDs, Process IDs, Museum IDs and Field IDs.}
\item{bin}{(character) Returns all records contained in matching BINs. A
BIN is defined by a Barcode Index Number URI.}
\item{container}{(character) Returns all records contained in matching
projects or datasets. Containers include project codes and dataset codes}
\item{institutions}{(character) Returns all records stored in matching
institutions. Institutions are the Specimen Storing Site.}
\item{researchers}{(character) Returns all records containing matching
researcher names. Researchers include collectors and specimen identifiers.}
\item{geo}{(character) Returns all records collected in matching geographic
sites. Geographic sites includes countries and province/states.}
\item{marker}{(character) Returns all records containing matching
marker codes.}
\item{dest}{(character) A directory to write the files to}
\item{overwrite}{(logical) Overwrite existing directory and file?}
\item{progress}{(logical) Print progress or not. NOT AVAILABLE FOR NOW.
HOPEFULLY WILL RETURN SOON.}
\item{...}{Further args passed on to \code{\link[crul]{HttpClient}}}
\item{x}{Object to print or read.}
}
\description{
Get BOLD trace files
}
\examples{
\dontrun{
# Use a specific destination directory
bold_trace(taxon='Bombus', geo='Alaska', dest="~/mytarfiles")
# Another example
# bold_trace(ids='ACRJP618-11', dest="~/mytarfiles")
# bold_trace(ids=c('ACRJP618-11','ACRJP619-11'), dest="~/mytarfiles")
# read file in
x <- bold_trace(ids=c('ACRJP618-11','ACRJP619-11'), dest="~/mytarfiles")
(res <- read_trace(x$ab1[2]))
# The progress dialog is pretty verbose, so quiet=TRUE is a nice touch,
# but not by default
# Beware, this one take a while
# x <- bold_trace(taxon='Osmia', quiet=TRUE)
if (requireNamespace("sangerseqR", quietly = TRUE)) {
library("sangerseqR")
primarySeq(res)
secondarySeq(res)
head(traceMatrix(res))
}
}
}
\references{
\url{http://v4.boldsystems.org/index.php/resources/api?type=webservices}
}
bold/man/bold_specimens.Rd 0000644 0001762 0000144 00000005205 13134220504 015216 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/bold_specimens.R
\name{bold_specimens}
\alias{bold_specimens}
\title{Search BOLD for specimens.}
\usage{
bold_specimens(taxon = NULL, ids = NULL, bin = NULL, container = NULL,
institutions = NULL, researchers = NULL, geo = NULL, response = FALSE,
format = "tsv", ...)
}
\arguments{
\item{taxon}{(character) Returns all records containing matching taxa. Taxa
includes the ranks of phylum, class, order, family, subfamily, genus,
and species.}
\item{ids}{(character) Returns all records containing matching IDs. IDs
include Sample IDs, Process IDs, Museum IDs and Field IDs.}
\item{bin}{(character) Returns all records contained in matching BINs. A
BIN is defined by a Barcode Index Number URI.}
\item{container}{(character) Returns all records contained in matching
projects or datasets. Containers include project codes and dataset codes}
\item{institutions}{(character) Returns all records stored in matching
institutions. Institutions are the Specimen Storing Site.}
\item{researchers}{(character) Returns all records containing matching
researcher names. Researchers include collectors and specimen identifiers.}
\item{geo}{(character) Returns all records collected in matching geographic
sites. Geographic sites includes countries and province/states.}
\item{response}{(logical) Note that response is the object that returns
from the Curl call, useful for debugging, and getting detailed info on
the API call.}
\item{format}{(character) One of xml, json, tsv (default). tsv format gives
back a data.frame object. xml gives back parsed XML as \code{xml_document}
object. 'json' (JavaScript Object Notation) and 'dwc' (Darwin Core Archive)
are supported in theory, but the JSON can be malformed, so we don't support
that here, and the DWC option actually returns TSV.}
\item{...}{Further args passed on to \code{\link[crul]{HttpClient}}, main
purpose being curl debugging}
}
\description{
Search BOLD for specimens.
}
\examples{
\dontrun{
bold_specimens(taxon='Osmia')
bold_specimens(taxon='Osmia', format='xml')
bold_specimens(taxon='Osmia', response=TRUE)
res <- bold_specimens(taxon='Osmia', format='xml', response=TRUE)
res$url
res$status_code
res$response_headers
# More than 1 can be given for all search parameters
bold_specimens(taxon=c('Coelioxys','Osmia'))
## curl debugging
### These examples below take a long time, so you can set a timeout so that
### it stops by X sec
head(bold_specimens(taxon='Osmia', verbose = TRUE))
# head(bold_specimens(geo='Costa Rica', timeout_ms = 6))
}
}
\references{
\url{http://v4.boldsystems.org/index.php/resources/api?type=webservices}
}
bold/man/bold_seqspec.Rd 0000644 0001762 0000144 00000006500 13134220504 014672 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/bold_seqspec.R
\name{bold_seqspec}
\alias{bold_seqspec}
\title{Get BOLD specimen + sequence data.}
\usage{
bold_seqspec(taxon = NULL, ids = NULL, bin = NULL, container = NULL,
institutions = NULL, researchers = NULL, geo = NULL, marker = NULL,
response = FALSE, format = "tsv", sepfasta = FALSE, ...)
}
\arguments{
\item{taxon}{(character) Returns all records containing matching taxa. Taxa
includes the ranks of phylum, class, order, family, subfamily, genus,
and species.}
\item{ids}{(character) Returns all records containing matching IDs. IDs
include Sample IDs, Process IDs, Museum IDs and Field IDs.}
\item{bin}{(character) Returns all records contained in matching BINs. A
BIN is defined by a Barcode Index Number URI.}
\item{container}{(character) Returns all records contained in matching
projects or datasets. Containers include project codes and dataset codes}
\item{institutions}{(character) Returns all records stored in matching
institutions. Institutions are the Specimen Storing Site.}
\item{researchers}{(character) Returns all records containing matching
researcher names. Researchers include collectors and specimen identifiers.}
\item{geo}{(character) Returns all records collected in matching geographic
sites. Geographic sites includes countries and province/states.}
\item{marker}{(character) Returns all records containing matching marker
codes. See Details.}
\item{response}{(logical) Note that response is the object that returns
from the Curl call, useful for debugging, and getting detailed info on
the API call.}
\item{format}{(character) One of xml or tsv (default). tsv format gives
back a data.frame object. xml gives back parsed xml as a}
\item{sepfasta}{(logical) If \code{TRUE}, the fasta data is separated into
a list with names matching the processid's from the data frame.
Default: \code{FALSE}}
\item{...}{Further args passed on to \code{\link[crul]{HttpClient}}, main
purpose being curl debugging}
}
\value{
Either a data.frame, parsed xml, a http response object, or a list
with length two (a data.frame w/o nucleotide data, and a list with
nucleotide data)
}
\description{
Get BOLD specimen + sequence data.
}
\section{Marker}{
Notes from BOLD on the \code{marker} param:
"All markers for a specimen matching the search string will be returned.
ie. A record with COI-5P and ITS will return sequence data for both
markers even if only COI-5P was specified."
You will likely end up with data with markers that you did not request -
just be sure to filter those out as needed.
}
\examples{
\dontrun{
bold_seqspec(taxon='Osmia')
bold_seqspec(taxon='Osmia', format='xml')
bold_seqspec(taxon='Osmia', response=TRUE)
res <- bold_seqspec(taxon='Osmia', sepfasta=TRUE)
res$fasta[1:2]
res$fasta['GBAH0293-06']
# records that match a marker name
res <- bold_seqspec(taxon="Melanogrammus aeglefinus", marker="COI-5P")
# records that match a geographic locality
res <- bold_seqspec(taxon="Melanogrammus aeglefinus", geo="Canada")
## curl debugging
### You can do many things, including get verbose output on the curl call,
### and set a timeout
head(bold_seqspec(taxon='Osmia', verbose = TRUE))
## timeout
# head(bold_seqspec(taxon='Osmia', timeout_ms = 1))
}
}
\references{
\url{http://v4.boldsystems.org/index.php/resources/api?type=webservices}
}
bold/man/bold_stats.Rd 0000644 0001762 0000144 00000005150 13134224125 014370 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/bold_stats.R
\name{bold_stats}
\alias{bold_stats}
\title{Get BOLD stats}
\usage{
bold_stats(taxon = NULL, ids = NULL, bin = NULL, container = NULL,
institutions = NULL, researchers = NULL, geo = NULL,
dataType = "drill_down", response = FALSE, ...)
}
\arguments{
\item{taxon}{(character) Returns all records containing matching taxa. Taxa
includes the ranks of phylum, class, order, family, subfamily, genus,
and species.}
\item{ids}{(character) Returns all records containing matching IDs. IDs
include Sample IDs, Process IDs, Museum IDs and Field IDs.}
\item{bin}{(character) Returns all records contained in matching BINs. A
BIN is defined by a Barcode Index Number URI.}
\item{container}{(character) Returns all records contained in matching
projects or datasets. Containers include project codes and dataset codes}
\item{institutions}{(character) Returns all records stored in matching
institutions. Institutions are the Specimen Storing Site.}
\item{researchers}{(character) Returns all records containing matching
researcher names. Researchers include collectors and specimen identifiers.}
\item{geo}{(character) Returns all records collected in matching geographic
sites. Geographic sites includes countries and province/states.}
\item{dataType}{(character) one of "overview" or "drill_down" (default).
"drill_down": a detailed summary of information which provides record
counts by [BINs, Country, Storing Institution, Species]. "overview":
the total counts of [BINs, Countries, Storing Institutions, Orders,
Families, Genus, Species]}
\item{response}{(logical) Note that response is the object that returns
from the Curl call, useful for debugging, and getting detailed info on
the API call.}
\item{...}{Further args passed on to \code{\link[crul]{HttpClient}}, main
purpose being curl debugging}
}
\description{
Get BOLD stats
}
\examples{
\dontrun{
x <- bold_stats(taxon='Osmia')
x$total_records
x$records_with_species_name
x$bins
x$countries
x$depositories
x$order
x$family
x$genus
x$species
# just get all counts
lapply(Filter(is.list, x), "[[", "count")
res <- bold_stats(taxon='Osmia', response=TRUE)
res$url
res$status_code
res$response_headers
# More than 1 can be given for all search parameters
bold_stats(taxon=c('Coelioxys','Osmia'))
## curl debugging
### These examples below take a long time, so you can set a timeout so that
### it stops by X sec
bold_stats(taxon='Osmia', verbose = TRUE)
# bold_stats(geo='Costa Rica', timeout_ms = 6)
}
}
\references{
\url{http://v4.boldsystems.org/index.php/resources/api?type=webservices}
}
bold/man/bold_identify.Rd 0000644 0001762 0000144 00000005220 13134220504 015040 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/bold_identify.R
\name{bold_identify}
\alias{bold_identify}
\title{Search for matches to sequences against the BOLD COI database.}
\usage{
bold_identify(sequences, db = "COX1", response = FALSE, ...)
}
\arguments{
\item{sequences}{(character) Returns all records containing matching marker
codes. Required.}
\item{db}{(character) The database to match against, one of COX1,
COX1_SPECIES, COX1_SPECIES_PUBLIC, OR COX1_L604bp. See Details for
more information.}
\item{response}{(logical) Note that response is the object that returns
from the Curl call, useful for debugging, and getting detailed info on
the API call.}
\item{...}{Further args passed on to \code{\link[crul]{HttpClient}}, main
purpose being curl debugging}
}
\value{
A data.frame with details for each specimen matched. if a
failed request, returns \code{NULL}
}
\description{
Search for matches to sequences against the BOLD COI database.
}
\section{db parmeter options}{
\itemize{
\item COX1 Every COI barcode record on BOLD with a minimum sequence
length of 500bp (warning: unvalidated library and includes records without
species level identification). This includes many species represented by
only one or two specimens as well as all species with interim taxonomy. This
search only returns a list of the nearest matches and does not provide a
probability of placement to a taxon.
\item COX1_SPECIES Every COI barcode record with a species level
identification and a minimum sequence length of 500bp. This includes
many species represented by only one or two specimens as well as all
species with interim taxonomy.
\item COX1_SPECIES_PUBLIC All published COI records from BOLD and GenBank
with a minimum sequence length of 500bp. This library is a collection of
records from the published projects section of BOLD.
\item OR COX1_L604bp Subset of the Species library with a minimum sequence
length of 640bp and containing both public and private records. This library
is intended for short sequence identification as it provides maximum overlap
with short reads from the barcode region of COI.
}
}
\section{Named outputs}{
To maintain names on the output list of data make sure to pass in a
named list to the \code{sequences} parameter. You can for example,
take a list of sequences, and use \code{\link{setNames}} to set names.
}
\examples{
\dontrun{
seq <- sequences$seq1
res <- bold_identify(sequences=seq)
head(res[[1]])
head(bold_identify(sequences=seq, db='COX1_SPECIES')[[1]])
}
}
\references{
\url{http://v4.boldsystems.org/index.php/resources/api?type=idengine}
}
\seealso{
\code{\link{bold_identify_parents}}
}
bold/LICENSE 0000644 0001762 0000144 00000000057 13034000342 012166 0 ustar ligges users YEAR: 2017
COPYRIGHT HOLDER: Scott Chamberlain