bold/0000755000176200001440000000000013134504041011165 5ustar liggesusersbold/inst/0000755000176200001440000000000013134420302012137 5ustar liggesusersbold/inst/doc/0000755000176200001440000000000013134420302012704 5ustar liggesusersbold/inst/doc/bold_vignette.html0000644000176200001440000010676413134420302016435 0ustar liggesusers bold info

bold is an R package to connect to BOLD Systems via their API. Functions in bold let you search for sequence data, specimen data, sequence + specimen data, and download raw trace files.

bold info

Using bold

Install

Install bold from CRAN

install.packages("bold")

Or install the development version from GitHub

devtools::install_github("ropensci/bold")

Load the package

library("bold")

Search for taxonomic names via names

bold_tax_name searches for names with names.

bold_tax_name(name = 'Diplura')
#>     input  taxid   taxon tax_rank tax_division parentid       parentname
#> 1 Diplura 591238 Diplura    order      Animals       82          Insecta
#> 2 Diplura 603673 Diplura    genus     Protists    53974 Scytosiphonaceae
#>   taxonrep
#> 1  Diplura
#> 2     <NA>
bold_tax_name(name = c('Diplura', 'Osmia'))
#>     input  taxid   taxon tax_rank tax_division parentid       parentname
#> 1 Diplura 591238 Diplura    order      Animals       82          Insecta
#> 2 Diplura 603673 Diplura    genus     Protists    53974 Scytosiphonaceae
#> 3   Osmia   4940   Osmia    genus      Animals     4962     Megachilinae
#>   taxonrep
#> 1  Diplura
#> 2     <NA>
#> 3    Osmia

Search for taxonomic names via BOLD identifiers

bold_tax_id searches for names with BOLD identifiers.

bold_tax_id(id = 88899)
#>   input taxid   taxon tax_rank tax_division parentid parentname
#> 1 88899 88899 Momotus    genus      Animals    88898  Momotidae
bold_tax_id(id = c(88899, 125295))
#>    input  taxid      taxon tax_rank tax_division parentid parentname
#> 1  88899  88899    Momotus    genus      Animals    88898  Momotidae
#> 2 125295 125295 Helianthus    genus       Plants   100962 Asteraceae

Search for sequence data only

The BOLD sequence API gives back sequence data, with a bit of metadata.

The default is to get a list back

bold_seq(taxon = 'Coelioxys')[1:2]
#> [[1]]
#> [[1]]$id
#> [1] "FBAPB491-09"
#>
#> [[1]]$name
#> [1] "Coelioxys conica"
#>
#> [[1]]$gene
#> [1] "FBAPB491-09"
#>
#> [[1]]$sequence
#> [1] "---------------------ACCTCTTTAAGAATAATTATTCGTATAGAAATAAGAATTCCAGGATCTTGAATTAATAATGATCAAATTTATAACTCCTTTATTACAGCACATGCATTTTTAATAATTTTTTTTTTAGTTATACCTTTTCTTATTGGAGGATTTGGAAATTGATTAGTACCTTTAATATTAGGATCACCAGATATAGCTTTCCCACGAATAAATAATATTAGATTTTGATTATTACCTCCTTCTTTATTAATATTATTATTAAGTAATTTAATAAATCCCAGACCAGGAACAGGCTGAACAGTTTATCCTCCTTTATCTTTATACACATACCACCCTTCTCCCTCAGTTGATTTAGCAATTTTTTCACTACATCTATCAGGAATCTCTTCTATTATTGGATCTATAAATTTTATTGTTACAATTTTAATAATAAAAAACTTTTCAATAAATTATAATCAAATACCATTATTCCCATGATCTATTTTAATTACTACTATTTTATTATTATTATCACTACCTGTATTAGCTGGTGCTATTACTATATTATTATTTGATCGAAATTTAAATTCTTCTTTTTTTGACCCTATAGGAGGAGGAGACCCAATTTTATACCAACATTTA"
#>
#>
#> [[2]]
#> [[2]]$id
#> [1] "FBAPC351-10"
#>
#> [[2]]$name
#> [1] "Coelioxys afra"
#>
#> [[2]]$gene
#> [1] "FBAPC351-10"
#>
#> [[2]]$sequence
#> [1] "---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ACGAATAAATAATGTAAGATTTTGACTATTACCTCCCTCAATTTTCTTATTATTATCAAGAACCCTAATTAACCCAAGAGCTGGTACTGGATGAACTGTATATCCTCCTTTATCCTTATATACATTTCATGCCTCACCTTCCGTTGATTTAGCAATTTTTTCACTTCATTTATCAGGAATTTCATCAATTATTGGATCAATAAATTTTATTGTTACAATCTTAATAATAAAAAATTTTTCTTTAAATTATAGACAAATACCATTATTTTCATGATCAGTTTTAATTACTACAATTTTACTTTTATTATCATTACCAATTTTAGCTGGAGCAATTACTATACTCCTATTTGATCGAAATTTAAATACCTCATTCTTTGACCCAATAGGAGGAGGAGATCCAATTTTATATCAACATTTATTT"

You can optionally get back the httr response object

res <- bold_seq(taxon = 'Coelioxys', response = TRUE)
res$headers
#> $date
#> [1] "Tue, 15 Sep 2015 20:02:31 GMT"
#>
#> $server
#> [1] "Apache/2.2.15 (Red Hat)"
#>
#> $`x-powered-by`
#> [1] "PHP/5.3.15"
#>
#> $`content-disposition`
#> [1] "attachment; filename=fasta.fas"
#>
#> $connection
#> [1] "close"
#>
#> $`transfer-encoding`
#> [1] "chunked"
#>
#> $`content-type`
#> [1] "application/x-download"
#>
#> attr(,"class")
#> [1] "insensitive" "list"

You can do geographic searches

bold_seq(geo = "USA")
#> [[1]]
#> [[1]]$id
#> [1] "GBAN1777-08"
#>
#> [[1]]$name
#> [1] "Macrobdella decora"
#>
#> [[1]]$gene
#> [1] "GBAN1777-08"
#>
#> [[1]]$sequence
#> [1] "---------------------------------ATTGGAATCTTGTATTTCTTATTAGGTACATGATCTGCTATAGTAGGGACCTCTATA---AGAATAATTATTCGAATTGAATTAGCTCAACCTGGGTCGTTTTTAGGAAAT---GATCAAATTTACAATACTATTGTTACTGCTCATGGATTAATTATAATTTTTTTTATAGTAATACCTATTTTAATTGGAGGGTTTGGTAATTGATTAATTCCGCTAATA---ATTGGTTCTCCTGATATAGCTTTTCCACGTCTTAATAATTTAAGATTTTGATTACTTCCGCCATCTTTAACTATACTTTTTTGTTCATCTATAGTCGAAAATGGAGTAGGTACTGGATGGACTATTTACCCTCCTTTAGCAGATAACATTGCTCATTCTGGACCTTCTGTAGATATA---GCAATTTTTTCACTTCATTTAGCTGGTGCTTCTTCTATTTTAGGTTCATTAAATTTTATTACTACTGTAGTTAATATACGATGACCAGGGATATCTATAGAGCGAATTCCTTTATTTATTTGATCCGTAATTATTACTACTGTATTGCTATTATTATCTTTACCAGTATTAGCAGCT---GCTATTTCAATATTATTAACAGATCGTAACTTAAATACTAGATTTTTTGACCCAATAGGAGGAGGGGATCCTATTTTATTCCAACATTTATTTTGATTTTTTGGCCACCCTGAAGTTTATATTTTAATTTTACCAGGATTTGGAGCTATTTCTCATGTAGTAAGTCATAACTCT---AAAAAATTAGAACCGTTTGGATCATTAGGGATATTATATGCAATAATTGGAATTGCAATTTTAGGTTTTATTGTTTGAGCACATCATATATTTACAGTAGGTCTTGATGTAGATACACGAGCTTATTTTACAGCAGCTACAATAGTTATTGCTGTTCCTACAGGAATTAAAGTATTTAGGTGATTG---GCAACT"
#>
#>
#> [[2]]
#> [[2]]$id
#> [1] "GBAN1780-08"
#>
#> [[2]]$name
#> [1] "Haemopis terrestris"
#>
#> [[2]]$gene
#> [1] "GBAN1780-08"
#>
#> [[2]]$sequence
#> [1] "---------------------------------ATTGGAACWTTWTATTTTATTTTNGGNGCTTGATCTGCTATATTNGGGATCTCAATA---AGGAATATTATTCGAATTGAGCCATCTCAACCTGGGAGATTATTAGGAAAT---GATCAATTATATAATTCATTAGTAACAGCTCATGGATTAATTATAATTTTCTTTATGGTTATGCCTATTTTGATTGGTGGGTTTGGTAATTGATTACTACCTTTAATA---ATTGGAGCCCCTGATATAGCTTTTCCTCGATTAAATAATTTAAGTTTTTGATTATTACCACCTTCATTAATTATATTGTTAAGATCCTCTATTATTGAAAGAGGGGTAGGTACAGGTTGAACCTTATATCCTCCTTTAGCAGATAGATTATTTCATTCAGGTCCATCGGTAGATATA---GCTATTTTTTCATTACATATAGCTGGAGCATCATCTATTTTAGGCTCATTAAACTTTATTTCTACAATTATTAATATACGAATTAAAGGTATAAGATCTGATCGAGTACCTTTATTTGTATGATCAGTTGTTATTACAACAGTTCTGTTATTATTGTCTTTACCTGTTTTAGCTGCA---GCTATTACTATATTATTAACAGATCGTAATTTAAATACTACTTTTTTTGATCCTATAGGAGGTGGAGATCCAGTATTGTTTCAACACTTATTTTGATTTTTTGGTCATCCAGAAGTATATATTTTGATTTTACCAGGATTTGGAGCAATTTCTCATATTATTACAAATAATTCT---AAAAAATTGGAACCTTTTGGATCTCTTGGTATAATTTATGCTATAATTGGAATTGCAGTTTTAGGGTTTATTGTATGAGCCCATCATATATTTACTGTAGGATTAGATGTTGATACTCGAGCTTATTTTACAGCAGCTACTATAGTTATTGCTGTTCCTACTGGTATTAAAGTTTTTAGGTGATTA---GCAACA"
#>
#>
#> [[3]]
#> [[3]]$id
#> [1] "GBNM0293-06"
#>
#> [[3]]$name
#> [1] "Steinernema carpocapsae"
#>
#> [[3]]$gene
#> [1] "GBNM0293-06"
#>
#> [[3]]$sequence
#> [1] "---------------------------------------------------------------------------------ACAAGATTATCTCTTATTATTCGTTTAGAGTTGGCTCAACCTGGTCTTCTTTTGGGTAAT---GGTCAATTATATAATTCTATTATTACTGCTCATGCTATTCTTATAATTTTTTTCATAGTTATACCTAGAATAATTGGTGGTTTTGGTAATTGAATATTACCTTTAATATTGGGGGCTCCTGATATAAGTTTTCCACGTTTGAATAATTTAAGTTTTTGATTGCTACCAACTGCTATATTTTTGATTTTAGATTCTTGTTTTGTTGACACTGGTTGTGGTACTAGTTGAACTGTTTATCCTCCTTTGAGG---ACTTTAGGTCACCCTGGYAGAAGTGTAGATTTAGCTATTTTTAGTCTTCATTGTGCAGGAATTAGCTCAATTTTAGGGGCTATTAATTTTATATGTACTACAAAAAATCTTCGTAGTAGTTCTATTTCTTTGGAACATATAAGACTTTTTGTTTGGGCTGTTTTTGTTACTGTTTTTTTATTAGTTTTATCTTTACCTGTTTTAGCTGGTGCTATTACTATGCTTTTAACAGACCGTAATTTAAATACTTCTTTTTTT------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"
#>
#>
#> [[4]]
#> [[4]]$id
#> [1] "NEONV108-11"
#>
#> [[4]]$name
#> [1] "Aedes thelcter"
#>
#> [[4]]$gene
#> [1] "NEONV108-11"
#>
#> [[4]]$sequence
#> [1] "AACTTTATACTTCATCTTCGGAGTTTGATCAGGAATAGTTGGTACATCATTAAGAATTTTAATTCGTGCTGAATTAAGTCAACCAGGTATATTTATTGGAAATGACCAAATTTATAATGTAATTGTTACAGCTCATGCTTTTATTATAATTTTCTTTATAGTTATACCTATTATAATTGGAGGATTTGGAAATTGACTAGTTCCTCTAATATTAGGAGCCCCAGATATAGCTTTCCCTCGAATAAATAATATAAGTTTTTGAATACTACCTCCCTCATTAACTCTTCTACTTTCAAGTAGTATAGTAGAAAATGGATCAGGAACAGGATGAACAGTTTATCCACCTCTTTCATCTGGAACTGCTCATGCAGGAGCCTCTGTTGATTTAACTATTTTTTCTCTTCATTTAGCCGGAGTTTCATCAATTTTAGGGGCTGTAAATTTTATTACTACTGTAATTAATATACGATCTGCAGGAATTACTCTTGATCGACTACCTTTATTCGTTTGATCTGTAGTAATTACAGCTGTTTTATTACTTCTTTCACTTCCTGTATTAGCTGGAGCTATTACAATACTATTAACTGATCGAAATTTAAATACATCTTTCTTTGATCCAATTGGAGGAGGAGACCCAATTTTATACCAACATTTATTT"
#>
#>
#> [[5]]
#> [[5]]$id
#> [1] "NEONV109-11"
#>
#> [[5]]$name
#> [1] "Aedes thelcter"
#>
#> [[5]]$gene
#> [1] "NEONV109-11"
#>
#> [[5]]$sequence
#> [1] "AACTTTATACTTCATCTTCGGAGTTTGATCAGGAATAGTTGGTACATCATTAAGAATTTTAATTCGTGCTGAATTAAGTCAACCAGGTATATTTATTGGAAATGACCAAATTTATAATGTAATTGTTACAGCTCATGCTTTTATTATAATTTTCTTTATAGTTATACCTATTATAATTGGAGGATTTGGAAATTGACTAGTTCCTCTAATATTAGGAGCCCCAGATATAGCTTTCCCTCGAATAAATAATATAAGTTTTTGAATACTACCTCCCTCATTAACTCTTCTACTTTCAAGTAGTATAGTAGAAAATGGGTCAGGAACAGGATGAACAGTTTATCCACCTCTTTCATCTGGAACTGCTCATGCAGGAGCCTCTGTTGATTTAACTATTTTTTCTCTTCATTTAGCCGGAGTTTCATCAATTTTAGGGGCTGTAAATTTTATTACTACTGTAATTAATATACGATCTGCAGGAATTACTCTTGATCGACTACCTTTATTCGTTTGATCTGTAGTAATTACAGCTGTTTTATTACTTCTTTCACTTCCTGTATTAGCTGGAGCTATTACAATACTATTAACTGATCGAAATTTAAATACATCTTTCTTTGACCCAATTGGAGGGGGAGACCCAATTTTATACCAACATTTATTT"

And you can search by researcher name

bold_seq(researchers = 'Thibaud Decaens')[[1]]
#> $id
#> [1] "BGABA657-14"
#>
#> $name
#> [1] "Coleoptera"
#>
#> $gene
#> [1] "BGABA657-14"
#>
#> $sequence
#> [1] "ACACTCTATTTCATTTTCGGAGCTTGATCAGGAATAGTAGGAACTTCTTTAAGAATACTAATTCGATCTGAATTGGGAAACCCCGGCTCATTGATTGGGGATGATCAAATTTATAATGTTATTGTAACAGCCCATGCATTCATTATAATTTTTTTTATAGTAATACCGATCATAATAGGAGGTTTTGGAAATTGATTAGTCCCGCTAATATTAGGTGCCCCAGATATAGCATTTCCTCGAATAAATAATATAAGATTTTGACTTCTTCCGCCTTCATTAACTTTACTTATTATAAGAAGAATTGTAGAAAACGGGGCGGGAACAGGATGAACAGTTTACCCACCCCTCTCTTCTAACATTGCTCATAGAGGAGCCTCTGTAGATCTTGCAATTTTTAGATTACATTTAGCCGGTGTATCATCAATTTTAGGTGCAGTTAATTTTATTACAACTATTATTAATATACGACCTAAAGGAATAACATTTGATCGCATACCTTTATTTGTATGAGCTGTAGCTTTAACTGCATTACTTTTATTATTATCTTTACCAGTATTAGCAGGTGCAATTACAATACTTTTAACTGATCGA---------------------------------------"

by taxon IDs

bold_seq(ids = c('ACRJP618-11', 'ACRJP619-11'))
#> [[1]]
#> [[1]]$id
#> [1] "ACRJP618-11"
#>
#> [[1]]$name
#> [1] "Lepidoptera"
#>
#> [[1]]$gene
#> [1] "ACRJP618-11"
#>
#> [[1]]$sequence
#> [1] "------------------------TTGAGCAGGCATAGTAGGAACTTCTCTTAGTCTTATTATTCGAACAGAATTAGGAAATCCAGGATTTTTAATTGGAGATGATCAAATCTACAATACTATTGTTACGGCTCATGCTTTTATTATAATTTTTTTTATAGTTATACCTATTATAATTGGAGGATTTGGTAATTGATTAGTTCCCCTTATACTAGGAGCCCCAGATATAGCTTTCCCTCGAATAAACAATATAAGTTTTTGGCTTCTTCCCCCTTCACTATTACTTTTAATTTCCAGAAGAATTGTTGAAAATGGAGCTGGAACTGGATGAACAGTTTATCCCCCACTGTCATCTAATATTGCCCATAGAGGTACATCAGTAGATTTAGCTATTTTTTCTTTACATTTAGCAGGTATTTCCTCTATTTTAGGAGCGATTAATTTTATTACTACAATTATTAATATACGAATTAACAGTATAAATTATGATCAAATACCACTATTTGTGTGATCAGTAGGAATTACTGCTTTACTCTTATTACTTTCTCTTCCAGTATTAGCAGGTGCTATCACTATATTATTAACGGATCGAAATTTAAATACATCATTTTTTGATCCTGCAGGAGGAGGAGATCCAATTTTATATCAACATTTATTT"
#>
#>
#> [[2]]
#> [[2]]$id
#> [1] "ACRJP619-11"
#>
#> [[2]]$name
#> [1] "Lepidoptera"
#>
#> [[2]]$gene
#> [1] "ACRJP619-11"
#>
#> [[2]]$sequence
#> [1] "AACTTTATATTTTATTTTTGGTATTTGAGCAGGCATAGTAGGAACTTCTCTTAGTCTTATTATTCGAACAGAATTAGGAAATCCAGGATTTTTAATTGGAGATGATCAAATCTACAATACTATTGTTACGGCTCATGCTTTTATTATAATTTTTTTTATAGTTATACCTATTATAATTGGAGGATTTGGTAATTGATTAGTTCCCCTTATACTAGGAGCCCCAGATATAGCTTTCCCTCGAATAAACAATATAAGTTTTTGGCTTCTTCCCCCTTCACTATTACTTTTAATTTCCAGAAGAATTGTTGAAAATGGAGCTGGAACTGGATGAACAGTTTATCCCCCACTGTCATCTAATATTGCCCATAGAGGTACATCAGTAGATTTAGCTATTTTTTCTTTACATTTAGCAGGTATTTCCTCTATTTTAGGAGCGATTAATTTTATTACTACAATTATTAATATACGAATTAACAGTATAAATTATGATCAAATACCACTATTTGTGTGATCAGTAGGAATTACTGCTTTACTCTTATTACTTTCTCTTCCAGTATTAGCAGGTGCTATCACTATATTATTAACGGATCGAAATTTAAATACATCATTTTTTGATCCTGCAGGAGGAGGAGATCCAATTTTATATCAACATTTATTT"

by container (containers include project codes and dataset codes)

bold_seq(container = 'ACRJP')[[1]]
#> $id
#> [1] "ACRJP003-09"
#>
#> $name
#> [1] "Lepidoptera"
#>
#> $gene
#> [1] "ACRJP003-09"
#>
#> $sequence
#> [1] "AACATTATATTTTATTTTTGGGATCTGATCTGGAATAGTAGGGACATCTTTAAGTATACTAATTCGAATAGAACTAGGAAATCCTGGATGTTTAATTGGGGATGATCAAATTTATAATACTATTGTTACAGCTCATGCTTTTATTATAATTTTTTTTATAGTTATACCCATTATAATTGGAGGTTTTGGCAATTGACTTGTACCATTAATATTAGGAGCCCCTGATATAGCATTTCCCCGAATAAATAATATAAGATTTTGACTTCTTCCCCCCTCATTAATTTTATTAATTTCAAGAAGAATTGTTGAAAATGGAGCAGGAACAGGATGAACAGTCTATCCTCCATTATCTTCTAATATTGCGCATAGAGGATCCTCTGTTGATTTAGCTATTTTCTCACTTCATTTAGCAGGAATTTCTTCTATTTTAGGAGCAATTAATTTTATTACAACTATTATTAATATACGAATAAATAATTTACTTTTTGACCAAATACCTCTATTTGTTTGAGCAGTAGGTATTACAGCTGTTCTTCTTTTATTATCATTACCAGTATTAGCAGGAGCAATTACCATACTATTAACAGATCGTAATTTAAATACTTCTTTCTTTGATCCTGCTGGAGGAGGAGATCCAATTTTATACCAACATTTATTT"

by bin (a bin is a Barcode Index Number)

bold_seq(bin = 'BOLD:AAA5125')[[1]]
#> $id
#> [1] "BLPAB406-06"
#>
#> $name
#> [1] "Eacles ormondei"
#>
#> $gene
#> [1] "BLPAB406-06"
#>
#> $sequence
#> [1] "AACTTTATATTTTATTTTTGGAATTTGAGCAGGTATAGTAGGAACTTCTTTAAGATTACTAATTCGAGCAGAATTAGGTACCCCCGGATCTTTAATTGGAGATGACCAAATTTATAATACCATTGTAACAGCTCATGCTTTTATTATAATTTTTTTTATAGTTATACCTATTATAATTGGAGGATTTGGAAATTGATTAGTACCCCTAATACTAGGAGCTCCTGATATAGCTTTCCCCCGAATAAATAATATAAGATTTTGACTATTACCCCCATCTTTAACTCTTTTAATTTCTAGAAGAATTGTCGAAAATGGAGCTGGAACTGGATGAACAGTTTATCCCCCCCTTTCATCTAATATTGCTCATGGAGGCTCTTCTGTTGATTTAGCTATTTTTTCCCTTCATCTAGCTGGAATCTCATCAATTTTAGGAGCTATTAATTTTATCACAACAATCATTAATATACGACTAAATAATATAATATTTGACCAAATACCTTTATTTGTATGAGCTGTTGGTATTACAGCATTTCTTTTATTGTTATCTTTACCTGTACTAGCTGGAGCTATTACTATACTTTTAACAGATCGAAACTTAAATACATCATTTTTTGACCCAGCAGGAGGAGGAGATCCTATTCTCTATCAACATTTATTT"

And there are more ways to query, check out the docs for ?bold_seq.

Search for specimen data only

The BOLD specimen API doesn't give back sequences, only specimen data. By default you download tsv format data, which is given back to you as a data.frame

res <- bold_specimens(taxon = 'Osmia')
head(res[,1:8])
#>      processid         sampleid recordID       catalognum         fieldnum
#> 1  ASGCB261-13   BIOUG07489-F10  3955538                    BIOUG07489-F10
#> 2 BCHYM1499-13 BC ZSM HYM 19359  4005348 BC ZSM HYM 19359 BC ZSM HYM 19359
#> 3  BCHYM412-13 BC ZSM HYM 18272  3896353 BC ZSM HYM 18272 BC ZSM HYM 18272
#> 4  BCHYM413-13 BC ZSM HYM 18273  3896354 BC ZSM HYM 18273 BC ZSM HYM 18273
#> 5  FBAPB706-09 BC ZSM HYM 02181  1289067 BC ZSM HYM 02181 BC ZSM HYM 02181
#> 6  FBAPB730-09 BC ZSM HYM 02205  1289091 BC ZSM HYM 02205 BC ZSM HYM 02205
#>                         institution_storing      bin_uri phylum_taxID
#> 1         Biodiversity Institute of Ontario BOLD:AAB8874           20
#> 2 SNSB, Zoologische Staatssammlung Muenchen BOLD:AAD6282           20
#> 3 SNSB, Zoologische Staatssammlung Muenchen BOLD:AAP2416           20
#> 4 SNSB, Zoologische Staatssammlung Muenchen BOLD:AAP2416           20
#> 5 SNSB, Zoologische Staatssammlung Muenchen BOLD:AAE4126           20
#> 6 SNSB, Zoologische Staatssammlung Muenchen BOLD:AAK5820           20

You can optionally get back the data in XML format

bold_specimens(taxon = 'Osmia', format = 'xml')
<?xml version="1.0" encoding="UTF-8"?>
<bold_records  xsi:noNamespaceSchemaLocation="http://www.boldsystems.org/schemas/BOLDPublic_record.xsd"  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
  <record>
    <record_id>1470124</record_id>
    <processid>BOM1525-10</processid>
    <bin_uri>BOLD:AAN3337</bin_uri>
    <specimen_identifiers>
      <sampleid>DHB 1011</sampleid>
      <catalognum>DHB 1011</catalognum>
      <fieldnum>DHB1011</fieldnum>
      <institution_storing>Marjorie Barrick Museum</institution_storing>
    </specimen_identifiers>
    <taxonomy>

You can choose to get the httr response object back if you'd rather work with the raw data returned from the BOLD API.

res <- bold_specimens(taxon = 'Osmia', format = 'xml', response = TRUE)
res$url
#> [1] "http://v4.boldsystems.org/index.php/API_Public/specimen?taxon=Osmia&format=xml"
res$status_code
#> [1] 200
res$headers
#> NULL

Search for specimen plus sequence data

The specimen/sequence combined API gives back specimen and sequence data. Like the specimen API, this one gives by default tsv format data, which is given back to you as a data.frame. Here, we're setting sepfasta=TRUE so that the sequence data is given back as a list, and taken out of the data.frame returned so the data.frame is more manageable.

res <- bold_seqspec(taxon = 'Osmia', sepfasta = TRUE)
res$fasta[1:2]
#> $`ASGCB261-13`
#> [1] "AATTTTATATATAATTTTTGCTATATGATCAGGAATAATTGGTTCAGCAATAAGAATTATTATTCGAATAGAATTAAGAATTCCTGGTTCATGAATTTCAAATGATCAAACTTATAATTCTTTAGTTACTGCTCATGCTTTTTTAATAATTTTTTTCTTAGTTATACCATTCTTAATTGGGGGATTTGGAAATTGATTAATTCCTTTAATATTAGGAATTCCAGATATAGCATTTCCACGAATAAATAATATTAGATTTTGACTTTTACCTCCTTCTTTAATACTTTTATTATTAAGAAATTTTATAAATCCTAGTCCAGGAACTGGATGAACTGTTTATCCACCTTTATCTTCTCATTTATTTCATTCTTCTCCTTCAGTTGATATAGCTATTTTTTCTTTACATATTTCTGGTTTATCTTCTATTATAGGTTCATTAAATTTTATTGTTACAATTATTATAATAAAAAATATTTCTTTAAAACATATTCAATTACCTTTATTTCCTTGATCTGTCTTTATTACTACTATTTTATTACTTTTTTCTTTACCTGTTTTAGCAGGTGCAATTACTATATTATTATTTGATCGAAATTTTAATACTTCATTTTTTGATCCTACAGGAGGAGGAGATCCTATTCTTTATCAACATTTATTT"
#>
#> $`BCHYM1499-13`
#> [1] "AATTCTTTACATAATTTTTGCTTTATGATCTGGAATAATTGGGTCAGCAATAAGAATTATTATTCGAATAGAATTAAGTATCCCAGGTTCATGAATTACTAATGATCAAATTTATAATTCTTTAGTAACTGCACATGCTTTTTTAATAATTTTTTTTCTTGTGATACCATTTTTAATTGGAGGATTTGGAAATTGATTAATTCCTTTAATATTAGGAATTCCAGATATAGCTTTCCCACGAATAAACAATATTAGATTTTGATTATTACCGCCATCTTTAATATTATTACTTTTAAGAAATTTTTTAAATCCAAGTCCTGGAACAGGATGAACAGTTTATCCCCCTTTATCATCAAATTTATTTCATTCTTCTCCTTCAGTTGATTTAGCAATTTTTTCTTTACATATTTCAGGTTTATCTTCTATTATAGGTTCATTAAATTTTATTGTTACAATTATTATAATAAAAAATATTTCTTTAAAATATATTCAATTGCCTTTATTTCCTTGATCTGTATTTATTACTACTATTCTTTTATTATTTTCTTTACCTGTGTTAGCTGGAGCTATTACTATATTATTATTTGATCGAAATTTTAATACATCTTTTTTTGATCCTACAGGAGGAGGAGATCCAATTCTTTATCAACATTTATTT"

Or you can index to a specific sequence like

res$fasta['GBAH0293-06']
#> $`GBAH0293-06`
#> [1] "------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------TTAATGTTAGGGATTCCAGATATAGCTTTTCCACGAATAAATAATATTAGATTTTGACTGTTACCTCCATCTTTAATATTATTACTTTTAAGAAATTTTTTAAATCCAAGTCCTGGAACAGGATGAACAGTTTATCCTCCTTTATCATCAAATTTATTTCATTCTTCTCCTTCAGTTGATTTAGCAATTTTTTCTTTACATATTTCAGGTTTATCTTCTATTATAGGTTCATTAAATTTTATTGTTACAATTATTATAATAAAAAATATTTCTTTAAAATATATTCAATTACCTTTATTTTCTTGATCTGTATTTATTACTACTATTCTTTTATTATTTTCTTTACCTGTATTAGCTGGAGCTATTACTATATTATTATTTGATCGAAATTTTAATACATCTTTTTTTGATCCAACAGGAGGGGGAGATCCAATTCTTTATCAACATTTATTTTGATTTTTTGGTCATCCTGAAGTTTATATTTTAATTTTACCTGGATTTGGATTAATTTCTCAAATTATTTCTAATGAAAGAGGAAAAAAAGAAACTTTTGGAAATATTGGTATAATTTATGCTATATTAAGAATTGGACTTTTAGGTTTTATTGTT---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"

Get trace files

This function downloads files to your machine - it does not load them into your R session - but prints out where the files are for your information.

bold_trace(taxon = 'Osmia', quiet = TRUE)
bold/inst/doc/bold_vignette.Rmd0000644000176200001440000004774713134226657016241 0ustar liggesusers `bold` is an R package to connect to [BOLD Systems](http://www.boldsystems.org/) via their API. Functions in `bold` let you search for sequence data, specimen data, sequence + specimen data, and download raw trace files. ### bold info + [BOLD home page](http://boldsystems.org/) + [BOLD API docs](http://v4.boldsystems.org/index.php/api_home) ### Using bold **Install** Install `bold` from CRAN ```r install.packages("bold") ``` Or install the development version from GitHub ```r devtools::install_github("ropensci/bold") ``` Load the package ```r library("bold") ``` ### Search for taxonomic names via names `bold_tax_name` searches for names with names. ```r bold_tax_name(name = 'Diplura') #> input taxid taxon tax_rank tax_division parentid parentname #> 1 Diplura 591238 Diplura order Animals 82 Insecta #> 2 Diplura 603673 Diplura genus Protists 53974 Scytosiphonaceae #> taxonrep #> 1 Diplura #> 2 ``` ```r bold_tax_name(name = c('Diplura', 'Osmia')) #> input taxid taxon tax_rank tax_division parentid parentname #> 1 Diplura 591238 Diplura order Animals 82 Insecta #> 2 Diplura 603673 Diplura genus Protists 53974 Scytosiphonaceae #> 3 Osmia 4940 Osmia genus Animals 4962 Megachilinae #> taxonrep #> 1 Diplura #> 2 #> 3 Osmia ``` ### Search for taxonomic names via BOLD identifiers `bold_tax_id` searches for names with BOLD identifiers. ```r bold_tax_id(id = 88899) #> input taxid taxon tax_rank tax_division parentid parentname #> 1 88899 88899 Momotus genus Animals 88898 Momotidae ``` ```r bold_tax_id(id = c(88899, 125295)) #> input taxid taxon tax_rank tax_division parentid parentname #> 1 88899 88899 Momotus genus Animals 88898 Momotidae #> 2 125295 125295 Helianthus genus Plants 100962 Asteraceae ``` ### Search for sequence data only The BOLD sequence API gives back sequence data, with a bit of metadata. The default is to get a list back ```r bold_seq(taxon = 'Coelioxys')[1:2] #> [[1]] #> [[1]]$id #> [1] "FBAPB491-09" #> #> [[1]]$name #> [1] "Coelioxys conica" #> #> [[1]]$gene #> [1] "FBAPB491-09" #> #> [[1]]$sequence #> [1] "---------------------ACCTCTTTAAGAATAATTATTCGTATAGAAATAAGAATTCCAGGATCTTGAATTAATAATGATCAAATTTATAACTCCTTTATTACAGCACATGCATTTTTAATAATTTTTTTTTTAGTTATACCTTTTCTTATTGGAGGATTTGGAAATTGATTAGTACCTTTAATATTAGGATCACCAGATATAGCTTTCCCACGAATAAATAATATTAGATTTTGATTATTACCTCCTTCTTTATTAATATTATTATTAAGTAATTTAATAAATCCCAGACCAGGAACAGGCTGAACAGTTTATCCTCCTTTATCTTTATACACATACCACCCTTCTCCCTCAGTTGATTTAGCAATTTTTTCACTACATCTATCAGGAATCTCTTCTATTATTGGATCTATAAATTTTATTGTTACAATTTTAATAATAAAAAACTTTTCAATAAATTATAATCAAATACCATTATTCCCATGATCTATTTTAATTACTACTATTTTATTATTATTATCACTACCTGTATTAGCTGGTGCTATTACTATATTATTATTTGATCGAAATTTAAATTCTTCTTTTTTTGACCCTATAGGAGGAGGAGACCCAATTTTATACCAACATTTA" #> #> #> [[2]] #> [[2]]$id #> [1] "FBAPC351-10" #> #> [[2]]$name #> [1] "Coelioxys afra" #> #> [[2]]$gene #> [1] "FBAPC351-10" #> #> [[2]]$sequence #> [1] "---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ACGAATAAATAATGTAAGATTTTGACTATTACCTCCCTCAATTTTCTTATTATTATCAAGAACCCTAATTAACCCAAGAGCTGGTACTGGATGAACTGTATATCCTCCTTTATCCTTATATACATTTCATGCCTCACCTTCCGTTGATTTAGCAATTTTTTCACTTCATTTATCAGGAATTTCATCAATTATTGGATCAATAAATTTTATTGTTACAATCTTAATAATAAAAAATTTTTCTTTAAATTATAGACAAATACCATTATTTTCATGATCAGTTTTAATTACTACAATTTTACTTTTATTATCATTACCAATTTTAGCTGGAGCAATTACTATACTCCTATTTGATCGAAATTTAAATACCTCATTCTTTGACCCAATAGGAGGAGGAGATCCAATTTTATATCAACATTTATTT" ``` You can optionally get back the `httr` response object ```r res <- bold_seq(taxon = 'Coelioxys', response = TRUE) res$headers #> $date #> [1] "Tue, 15 Sep 2015 20:02:31 GMT" #> #> $server #> [1] "Apache/2.2.15 (Red Hat)" #> #> $`x-powered-by` #> [1] "PHP/5.3.15" #> #> $`content-disposition` #> [1] "attachment; filename=fasta.fas" #> #> $connection #> [1] "close" #> #> $`transfer-encoding` #> [1] "chunked" #> #> $`content-type` #> [1] "application/x-download" #> #> attr(,"class") #> [1] "insensitive" "list" ``` You can do geographic searches ```r bold_seq(geo = "USA") #> [[1]] #> [[1]]$id #> [1] "GBAN1777-08" #> #> [[1]]$name #> [1] "Macrobdella decora" #> #> [[1]]$gene #> [1] "GBAN1777-08" #> #> [[1]]$sequence #> [1] "---------------------------------ATTGGAATCTTGTATTTCTTATTAGGTACATGATCTGCTATAGTAGGGACCTCTATA---AGAATAATTATTCGAATTGAATTAGCTCAACCTGGGTCGTTTTTAGGAAAT---GATCAAATTTACAATACTATTGTTACTGCTCATGGATTAATTATAATTTTTTTTATAGTAATACCTATTTTAATTGGAGGGTTTGGTAATTGATTAATTCCGCTAATA---ATTGGTTCTCCTGATATAGCTTTTCCACGTCTTAATAATTTAAGATTTTGATTACTTCCGCCATCTTTAACTATACTTTTTTGTTCATCTATAGTCGAAAATGGAGTAGGTACTGGATGGACTATTTACCCTCCTTTAGCAGATAACATTGCTCATTCTGGACCTTCTGTAGATATA---GCAATTTTTTCACTTCATTTAGCTGGTGCTTCTTCTATTTTAGGTTCATTAAATTTTATTACTACTGTAGTTAATATACGATGACCAGGGATATCTATAGAGCGAATTCCTTTATTTATTTGATCCGTAATTATTACTACTGTATTGCTATTATTATCTTTACCAGTATTAGCAGCT---GCTATTTCAATATTATTAACAGATCGTAACTTAAATACTAGATTTTTTGACCCAATAGGAGGAGGGGATCCTATTTTATTCCAACATTTATTTTGATTTTTTGGCCACCCTGAAGTTTATATTTTAATTTTACCAGGATTTGGAGCTATTTCTCATGTAGTAAGTCATAACTCT---AAAAAATTAGAACCGTTTGGATCATTAGGGATATTATATGCAATAATTGGAATTGCAATTTTAGGTTTTATTGTTTGAGCACATCATATATTTACAGTAGGTCTTGATGTAGATACACGAGCTTATTTTACAGCAGCTACAATAGTTATTGCTGTTCCTACAGGAATTAAAGTATTTAGGTGATTG---GCAACT" #> #> #> [[2]] #> [[2]]$id #> [1] "GBAN1780-08" #> #> [[2]]$name #> [1] "Haemopis terrestris" #> #> [[2]]$gene #> [1] "GBAN1780-08" #> #> [[2]]$sequence #> [1] "---------------------------------ATTGGAACWTTWTATTTTATTTTNGGNGCTTGATCTGCTATATTNGGGATCTCAATA---AGGAATATTATTCGAATTGAGCCATCTCAACCTGGGAGATTATTAGGAAAT---GATCAATTATATAATTCATTAGTAACAGCTCATGGATTAATTATAATTTTCTTTATGGTTATGCCTATTTTGATTGGTGGGTTTGGTAATTGATTACTACCTTTAATA---ATTGGAGCCCCTGATATAGCTTTTCCTCGATTAAATAATTTAAGTTTTTGATTATTACCACCTTCATTAATTATATTGTTAAGATCCTCTATTATTGAAAGAGGGGTAGGTACAGGTTGAACCTTATATCCTCCTTTAGCAGATAGATTATTTCATTCAGGTCCATCGGTAGATATA---GCTATTTTTTCATTACATATAGCTGGAGCATCATCTATTTTAGGCTCATTAAACTTTATTTCTACAATTATTAATATACGAATTAAAGGTATAAGATCTGATCGAGTACCTTTATTTGTATGATCAGTTGTTATTACAACAGTTCTGTTATTATTGTCTTTACCTGTTTTAGCTGCA---GCTATTACTATATTATTAACAGATCGTAATTTAAATACTACTTTTTTTGATCCTATAGGAGGTGGAGATCCAGTATTGTTTCAACACTTATTTTGATTTTTTGGTCATCCAGAAGTATATATTTTGATTTTACCAGGATTTGGAGCAATTTCTCATATTATTACAAATAATTCT---AAAAAATTGGAACCTTTTGGATCTCTTGGTATAATTTATGCTATAATTGGAATTGCAGTTTTAGGGTTTATTGTATGAGCCCATCATATATTTACTGTAGGATTAGATGTTGATACTCGAGCTTATTTTACAGCAGCTACTATAGTTATTGCTGTTCCTACTGGTATTAAAGTTTTTAGGTGATTA---GCAACA" #> #> #> [[3]] #> [[3]]$id #> [1] "GBNM0293-06" #> #> [[3]]$name #> [1] "Steinernema carpocapsae" #> #> [[3]]$gene #> [1] "GBNM0293-06" #> #> [[3]]$sequence #> [1] "---------------------------------------------------------------------------------ACAAGATTATCTCTTATTATTCGTTTAGAGTTGGCTCAACCTGGTCTTCTTTTGGGTAAT---GGTCAATTATATAATTCTATTATTACTGCTCATGCTATTCTTATAATTTTTTTCATAGTTATACCTAGAATAATTGGTGGTTTTGGTAATTGAATATTACCTTTAATATTGGGGGCTCCTGATATAAGTTTTCCACGTTTGAATAATTTAAGTTTTTGATTGCTACCAACTGCTATATTTTTGATTTTAGATTCTTGTTTTGTTGACACTGGTTGTGGTACTAGTTGAACTGTTTATCCTCCTTTGAGG---ACTTTAGGTCACCCTGGYAGAAGTGTAGATTTAGCTATTTTTAGTCTTCATTGTGCAGGAATTAGCTCAATTTTAGGGGCTATTAATTTTATATGTACTACAAAAAATCTTCGTAGTAGTTCTATTTCTTTGGAACATATAAGACTTTTTGTTTGGGCTGTTTTTGTTACTGTTTTTTTATTAGTTTTATCTTTACCTGTTTTAGCTGGTGCTATTACTATGCTTTTAACAGACCGTAATTTAAATACTTCTTTTTTT------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------" #> #> #> [[4]] #> [[4]]$id #> [1] "NEONV108-11" #> #> [[4]]$name #> [1] "Aedes thelcter" #> #> [[4]]$gene #> [1] "NEONV108-11" #> #> [[4]]$sequence #> [1] "AACTTTATACTTCATCTTCGGAGTTTGATCAGGAATAGTTGGTACATCATTAAGAATTTTAATTCGTGCTGAATTAAGTCAACCAGGTATATTTATTGGAAATGACCAAATTTATAATGTAATTGTTACAGCTCATGCTTTTATTATAATTTTCTTTATAGTTATACCTATTATAATTGGAGGATTTGGAAATTGACTAGTTCCTCTAATATTAGGAGCCCCAGATATAGCTTTCCCTCGAATAAATAATATAAGTTTTTGAATACTACCTCCCTCATTAACTCTTCTACTTTCAAGTAGTATAGTAGAAAATGGATCAGGAACAGGATGAACAGTTTATCCACCTCTTTCATCTGGAACTGCTCATGCAGGAGCCTCTGTTGATTTAACTATTTTTTCTCTTCATTTAGCCGGAGTTTCATCAATTTTAGGGGCTGTAAATTTTATTACTACTGTAATTAATATACGATCTGCAGGAATTACTCTTGATCGACTACCTTTATTCGTTTGATCTGTAGTAATTACAGCTGTTTTATTACTTCTTTCACTTCCTGTATTAGCTGGAGCTATTACAATACTATTAACTGATCGAAATTTAAATACATCTTTCTTTGATCCAATTGGAGGAGGAGACCCAATTTTATACCAACATTTATTT" #> #> #> [[5]] #> [[5]]$id #> [1] "NEONV109-11" #> #> [[5]]$name #> [1] "Aedes thelcter" #> #> [[5]]$gene #> [1] "NEONV109-11" #> #> [[5]]$sequence #> [1] "AACTTTATACTTCATCTTCGGAGTTTGATCAGGAATAGTTGGTACATCATTAAGAATTTTAATTCGTGCTGAATTAAGTCAACCAGGTATATTTATTGGAAATGACCAAATTTATAATGTAATTGTTACAGCTCATGCTTTTATTATAATTTTCTTTATAGTTATACCTATTATAATTGGAGGATTTGGAAATTGACTAGTTCCTCTAATATTAGGAGCCCCAGATATAGCTTTCCCTCGAATAAATAATATAAGTTTTTGAATACTACCTCCCTCATTAACTCTTCTACTTTCAAGTAGTATAGTAGAAAATGGGTCAGGAACAGGATGAACAGTTTATCCACCTCTTTCATCTGGAACTGCTCATGCAGGAGCCTCTGTTGATTTAACTATTTTTTCTCTTCATTTAGCCGGAGTTTCATCAATTTTAGGGGCTGTAAATTTTATTACTACTGTAATTAATATACGATCTGCAGGAATTACTCTTGATCGACTACCTTTATTCGTTTGATCTGTAGTAATTACAGCTGTTTTATTACTTCTTTCACTTCCTGTATTAGCTGGAGCTATTACAATACTATTAACTGATCGAAATTTAAATACATCTTTCTTTGACCCAATTGGAGGGGGAGACCCAATTTTATACCAACATTTATTT" ``` And you can search by researcher name ```r bold_seq(researchers = 'Thibaud Decaens')[[1]] #> $id #> [1] "BGABA657-14" #> #> $name #> [1] "Coleoptera" #> #> $gene #> [1] "BGABA657-14" #> #> $sequence #> [1] "ACACTCTATTTCATTTTCGGAGCTTGATCAGGAATAGTAGGAACTTCTTTAAGAATACTAATTCGATCTGAATTGGGAAACCCCGGCTCATTGATTGGGGATGATCAAATTTATAATGTTATTGTAACAGCCCATGCATTCATTATAATTTTTTTTATAGTAATACCGATCATAATAGGAGGTTTTGGAAATTGATTAGTCCCGCTAATATTAGGTGCCCCAGATATAGCATTTCCTCGAATAAATAATATAAGATTTTGACTTCTTCCGCCTTCATTAACTTTACTTATTATAAGAAGAATTGTAGAAAACGGGGCGGGAACAGGATGAACAGTTTACCCACCCCTCTCTTCTAACATTGCTCATAGAGGAGCCTCTGTAGATCTTGCAATTTTTAGATTACATTTAGCCGGTGTATCATCAATTTTAGGTGCAGTTAATTTTATTACAACTATTATTAATATACGACCTAAAGGAATAACATTTGATCGCATACCTTTATTTGTATGAGCTGTAGCTTTAACTGCATTACTTTTATTATTATCTTTACCAGTATTAGCAGGTGCAATTACAATACTTTTAACTGATCGA---------------------------------------" ``` by taxon IDs ```r bold_seq(ids = c('ACRJP618-11', 'ACRJP619-11')) #> [[1]] #> [[1]]$id #> [1] "ACRJP618-11" #> #> [[1]]$name #> [1] "Lepidoptera" #> #> [[1]]$gene #> [1] "ACRJP618-11" #> #> [[1]]$sequence #> [1] "------------------------TTGAGCAGGCATAGTAGGAACTTCTCTTAGTCTTATTATTCGAACAGAATTAGGAAATCCAGGATTTTTAATTGGAGATGATCAAATCTACAATACTATTGTTACGGCTCATGCTTTTATTATAATTTTTTTTATAGTTATACCTATTATAATTGGAGGATTTGGTAATTGATTAGTTCCCCTTATACTAGGAGCCCCAGATATAGCTTTCCCTCGAATAAACAATATAAGTTTTTGGCTTCTTCCCCCTTCACTATTACTTTTAATTTCCAGAAGAATTGTTGAAAATGGAGCTGGAACTGGATGAACAGTTTATCCCCCACTGTCATCTAATATTGCCCATAGAGGTACATCAGTAGATTTAGCTATTTTTTCTTTACATTTAGCAGGTATTTCCTCTATTTTAGGAGCGATTAATTTTATTACTACAATTATTAATATACGAATTAACAGTATAAATTATGATCAAATACCACTATTTGTGTGATCAGTAGGAATTACTGCTTTACTCTTATTACTTTCTCTTCCAGTATTAGCAGGTGCTATCACTATATTATTAACGGATCGAAATTTAAATACATCATTTTTTGATCCTGCAGGAGGAGGAGATCCAATTTTATATCAACATTTATTT" #> #> #> [[2]] #> [[2]]$id #> [1] "ACRJP619-11" #> #> [[2]]$name #> [1] "Lepidoptera" #> #> [[2]]$gene #> [1] "ACRJP619-11" #> #> [[2]]$sequence #> [1] "AACTTTATATTTTATTTTTGGTATTTGAGCAGGCATAGTAGGAACTTCTCTTAGTCTTATTATTCGAACAGAATTAGGAAATCCAGGATTTTTAATTGGAGATGATCAAATCTACAATACTATTGTTACGGCTCATGCTTTTATTATAATTTTTTTTATAGTTATACCTATTATAATTGGAGGATTTGGTAATTGATTAGTTCCCCTTATACTAGGAGCCCCAGATATAGCTTTCCCTCGAATAAACAATATAAGTTTTTGGCTTCTTCCCCCTTCACTATTACTTTTAATTTCCAGAAGAATTGTTGAAAATGGAGCTGGAACTGGATGAACAGTTTATCCCCCACTGTCATCTAATATTGCCCATAGAGGTACATCAGTAGATTTAGCTATTTTTTCTTTACATTTAGCAGGTATTTCCTCTATTTTAGGAGCGATTAATTTTATTACTACAATTATTAATATACGAATTAACAGTATAAATTATGATCAAATACCACTATTTGTGTGATCAGTAGGAATTACTGCTTTACTCTTATTACTTTCTCTTCCAGTATTAGCAGGTGCTATCACTATATTATTAACGGATCGAAATTTAAATACATCATTTTTTGATCCTGCAGGAGGAGGAGATCCAATTTTATATCAACATTTATTT" ``` by container (containers include project codes and dataset codes) ```r bold_seq(container = 'ACRJP')[[1]] #> $id #> [1] "ACRJP003-09" #> #> $name #> [1] "Lepidoptera" #> #> $gene #> [1] "ACRJP003-09" #> #> $sequence #> [1] "AACATTATATTTTATTTTTGGGATCTGATCTGGAATAGTAGGGACATCTTTAAGTATACTAATTCGAATAGAACTAGGAAATCCTGGATGTTTAATTGGGGATGATCAAATTTATAATACTATTGTTACAGCTCATGCTTTTATTATAATTTTTTTTATAGTTATACCCATTATAATTGGAGGTTTTGGCAATTGACTTGTACCATTAATATTAGGAGCCCCTGATATAGCATTTCCCCGAATAAATAATATAAGATTTTGACTTCTTCCCCCCTCATTAATTTTATTAATTTCAAGAAGAATTGTTGAAAATGGAGCAGGAACAGGATGAACAGTCTATCCTCCATTATCTTCTAATATTGCGCATAGAGGATCCTCTGTTGATTTAGCTATTTTCTCACTTCATTTAGCAGGAATTTCTTCTATTTTAGGAGCAATTAATTTTATTACAACTATTATTAATATACGAATAAATAATTTACTTTTTGACCAAATACCTCTATTTGTTTGAGCAGTAGGTATTACAGCTGTTCTTCTTTTATTATCATTACCAGTATTAGCAGGAGCAATTACCATACTATTAACAGATCGTAATTTAAATACTTCTTTCTTTGATCCTGCTGGAGGAGGAGATCCAATTTTATACCAACATTTATTT" ``` by bin (a bin is a _Barcode Index Number_) ```r bold_seq(bin = 'BOLD:AAA5125')[[1]] #> $id #> [1] "BLPAB406-06" #> #> $name #> [1] "Eacles ormondei" #> #> $gene #> [1] "BLPAB406-06" #> #> $sequence #> [1] "AACTTTATATTTTATTTTTGGAATTTGAGCAGGTATAGTAGGAACTTCTTTAAGATTACTAATTCGAGCAGAATTAGGTACCCCCGGATCTTTAATTGGAGATGACCAAATTTATAATACCATTGTAACAGCTCATGCTTTTATTATAATTTTTTTTATAGTTATACCTATTATAATTGGAGGATTTGGAAATTGATTAGTACCCCTAATACTAGGAGCTCCTGATATAGCTTTCCCCCGAATAAATAATATAAGATTTTGACTATTACCCCCATCTTTAACTCTTTTAATTTCTAGAAGAATTGTCGAAAATGGAGCTGGAACTGGATGAACAGTTTATCCCCCCCTTTCATCTAATATTGCTCATGGAGGCTCTTCTGTTGATTTAGCTATTTTTTCCCTTCATCTAGCTGGAATCTCATCAATTTTAGGAGCTATTAATTTTATCACAACAATCATTAATATACGACTAAATAATATAATATTTGACCAAATACCTTTATTTGTATGAGCTGTTGGTATTACAGCATTTCTTTTATTGTTATCTTTACCTGTACTAGCTGGAGCTATTACTATACTTTTAACAGATCGAAACTTAAATACATCATTTTTTGACCCAGCAGGAGGAGGAGATCCTATTCTCTATCAACATTTATTT" ``` And there are more ways to query, check out the docs for `?bold_seq`. ### Search for specimen data only The BOLD specimen API doesn't give back sequences, only specimen data. By default you download `tsv` format data, which is given back to you as a `data.frame` ```r res <- bold_specimens(taxon = 'Osmia') head(res[,1:8]) #> processid sampleid recordID catalognum fieldnum #> 1 ASGCB261-13 BIOUG07489-F10 3955538 BIOUG07489-F10 #> 2 BCHYM1499-13 BC ZSM HYM 19359 4005348 BC ZSM HYM 19359 BC ZSM HYM 19359 #> 3 BCHYM412-13 BC ZSM HYM 18272 3896353 BC ZSM HYM 18272 BC ZSM HYM 18272 #> 4 BCHYM413-13 BC ZSM HYM 18273 3896354 BC ZSM HYM 18273 BC ZSM HYM 18273 #> 5 FBAPB706-09 BC ZSM HYM 02181 1289067 BC ZSM HYM 02181 BC ZSM HYM 02181 #> 6 FBAPB730-09 BC ZSM HYM 02205 1289091 BC ZSM HYM 02205 BC ZSM HYM 02205 #> institution_storing bin_uri phylum_taxID #> 1 Biodiversity Institute of Ontario BOLD:AAB8874 20 #> 2 SNSB, Zoologische Staatssammlung Muenchen BOLD:AAD6282 20 #> 3 SNSB, Zoologische Staatssammlung Muenchen BOLD:AAP2416 20 #> 4 SNSB, Zoologische Staatssammlung Muenchen BOLD:AAP2416 20 #> 5 SNSB, Zoologische Staatssammlung Muenchen BOLD:AAE4126 20 #> 6 SNSB, Zoologische Staatssammlung Muenchen BOLD:AAK5820 20 ``` You can optionally get back the data in `XML` format ```r bold_specimens(taxon = 'Osmia', format = 'xml') ``` ```r 1470124 BOM1525-10 BOLD:AAN3337 DHB 1011 DHB 1011 DHB1011 Marjorie Barrick Museum ``` You can choose to get the `httr` response object back if you'd rather work with the raw data returned from the BOLD API. ```r res <- bold_specimens(taxon = 'Osmia', format = 'xml', response = TRUE) res$url #> [1] "http://v4.boldsystems.org/index.php/API_Public/specimen?taxon=Osmia&format=xml" res$status_code #> [1] 200 res$headers #> NULL ``` ### Search for specimen plus sequence data The specimen/sequence combined API gives back specimen and sequence data. Like the specimen API, this one gives by default `tsv` format data, which is given back to you as a `data.frame`. Here, we're setting `sepfasta=TRUE` so that the sequence data is given back as a list, and taken out of the `data.frame` returned so the `data.frame` is more manageable. ```r res <- bold_seqspec(taxon = 'Osmia', sepfasta = TRUE) res$fasta[1:2] #> $`ASGCB261-13` #> [1] "AATTTTATATATAATTTTTGCTATATGATCAGGAATAATTGGTTCAGCAATAAGAATTATTATTCGAATAGAATTAAGAATTCCTGGTTCATGAATTTCAAATGATCAAACTTATAATTCTTTAGTTACTGCTCATGCTTTTTTAATAATTTTTTTCTTAGTTATACCATTCTTAATTGGGGGATTTGGAAATTGATTAATTCCTTTAATATTAGGAATTCCAGATATAGCATTTCCACGAATAAATAATATTAGATTTTGACTTTTACCTCCTTCTTTAATACTTTTATTATTAAGAAATTTTATAAATCCTAGTCCAGGAACTGGATGAACTGTTTATCCACCTTTATCTTCTCATTTATTTCATTCTTCTCCTTCAGTTGATATAGCTATTTTTTCTTTACATATTTCTGGTTTATCTTCTATTATAGGTTCATTAAATTTTATTGTTACAATTATTATAATAAAAAATATTTCTTTAAAACATATTCAATTACCTTTATTTCCTTGATCTGTCTTTATTACTACTATTTTATTACTTTTTTCTTTACCTGTTTTAGCAGGTGCAATTACTATATTATTATTTGATCGAAATTTTAATACTTCATTTTTTGATCCTACAGGAGGAGGAGATCCTATTCTTTATCAACATTTATTT" #> #> $`BCHYM1499-13` #> [1] "AATTCTTTACATAATTTTTGCTTTATGATCTGGAATAATTGGGTCAGCAATAAGAATTATTATTCGAATAGAATTAAGTATCCCAGGTTCATGAATTACTAATGATCAAATTTATAATTCTTTAGTAACTGCACATGCTTTTTTAATAATTTTTTTTCTTGTGATACCATTTTTAATTGGAGGATTTGGAAATTGATTAATTCCTTTAATATTAGGAATTCCAGATATAGCTTTCCCACGAATAAACAATATTAGATTTTGATTATTACCGCCATCTTTAATATTATTACTTTTAAGAAATTTTTTAAATCCAAGTCCTGGAACAGGATGAACAGTTTATCCCCCTTTATCATCAAATTTATTTCATTCTTCTCCTTCAGTTGATTTAGCAATTTTTTCTTTACATATTTCAGGTTTATCTTCTATTATAGGTTCATTAAATTTTATTGTTACAATTATTATAATAAAAAATATTTCTTTAAAATATATTCAATTGCCTTTATTTCCTTGATCTGTATTTATTACTACTATTCTTTTATTATTTTCTTTACCTGTGTTAGCTGGAGCTATTACTATATTATTATTTGATCGAAATTTTAATACATCTTTTTTTGATCCTACAGGAGGAGGAGATCCAATTCTTTATCAACATTTATTT" ``` Or you can index to a specific sequence like ```r res$fasta['GBAH0293-06'] #> $`GBAH0293-06` #> [1] "------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------TTAATGTTAGGGATTCCAGATATAGCTTTTCCACGAATAAATAATATTAGATTTTGACTGTTACCTCCATCTTTAATATTATTACTTTTAAGAAATTTTTTAAATCCAAGTCCTGGAACAGGATGAACAGTTTATCCTCCTTTATCATCAAATTTATTTCATTCTTCTCCTTCAGTTGATTTAGCAATTTTTTCTTTACATATTTCAGGTTTATCTTCTATTATAGGTTCATTAAATTTTATTGTTACAATTATTATAATAAAAAATATTTCTTTAAAATATATTCAATTACCTTTATTTTCTTGATCTGTATTTATTACTACTATTCTTTTATTATTTTCTTTACCTGTATTAGCTGGAGCTATTACTATATTATTATTTGATCGAAATTTTAATACATCTTTTTTTGATCCAACAGGAGGGGGAGATCCAATTCTTTATCAACATTTATTTTGATTTTTTGGTCATCCTGAAGTTTATATTTTAATTTTACCTGGATTTGGATTAATTTCTCAAATTATTTCTAATGAAAGAGGAAAAAAAGAAACTTTTGGAAATATTGGTATAATTTATGCTATATTAAGAATTGGACTTTTAGGTTTTATTGTT---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------" ``` ### Get trace files This function downloads files to your machine - it does not load them into your R session - but prints out where the files are for your information. ```r bold_trace(taxon = 'Osmia', quiet = TRUE) ``` bold/tests/0000755000176200001440000000000013134420302012324 5ustar liggesusersbold/tests/testthat/0000755000176200001440000000000013134420302014164 5ustar liggesusersbold/tests/testthat/test-bold_tax_id.R0000644000176200001440000000364712773532625017572 0ustar liggesuserscontext("bold_tax_id") test_that("bold_tax_id returns the correct classes", { skip_on_cran() aa <- bold_tax_id(88899) bb <- bold_tax_id(125295) expect_is(aa, "data.frame") expect_is(bb, "data.frame") expect_is(aa$input, "numeric") expect_is(aa$taxid, "integer") expect_is(aa$tax_rank, "character") }) test_that("bold_tax_id works with multiple ids passed in", { skip_on_cran() aa <- bold_tax_id(c(88899,125295)) expect_is(aa, "data.frame") expect_equal(NROW(aa), 2) }) test_that("bold_tax_id dataTypes param works as expected", { skip_on_cran() aa <- bold_tax_id(88899, dataTypes = "basic") bb <- bold_tax_id(88899, dataTypes = "stats") dd <- bold_tax_id(88899, dataTypes = "geo") ee <- bold_tax_id(88899, dataTypes = "sequencinglabs") ff <- bold_tax_id(321215, dataTypes = "stats") # no public marker sequences gg <- bold_tax_id(321215, dataTypes = "basic,stats") # no public marker sequences expect_is(aa, "data.frame") expect_is(bb, "data.frame") expect_is(dd, "data.frame") expect_is(ee, "data.frame") expect_is(ff, "data.frame") expect_is(gg, "data.frame") expect_equal(NROW(aa), 1) expect_equal(NROW(bb), 1) expect_equal(NROW(dd), 1) expect_equal(NROW(ee), 1) expect_equal(NROW(ff), 1) expect_equal(NROW(gg), 1) expect_named(dd, c('input','Brazil','Mexico','Panama','Guatemala','Peru','Bolivia','Ecuador')) expect_gt(NCOL(bb), NCOL(aa)) expect_gt(NCOL(ee), NCOL(aa)) expect_gt(NCOL(bb), NCOL(ee)) expect_gt(NCOL(ff), NCOL(aa)) expect_gt(NCOL(gg), NCOL(ff)) }) test_that("includeTree param works as expected", { skip_on_cran() aa <- bold_tax_id(id=88899, includeTree=FALSE) bb <- bold_tax_id(id=88899, includeTree=TRUE) expect_is(aa, "data.frame") expect_is(bb, "data.frame") expect_gt(NROW(bb), NROW(aa)) }) test_that("bold_tax_id fails well", { skip_on_cran() expect_error(bold_tax_id(), "argument \"id\" is missing, with no default") }) bold/tests/testthat/test-bold_tax_name.R0000644000176200001440000000145512676331243020104 0ustar liggesuserscontext("bold_tax_name") test_that("bold_tax_name returns the correct classes", { skip_on_cran() a <- bold_tax_name(name='Diplura') b <- bold_tax_name(name=c('Diplura','Osmia')) cc <- bold_tax_name(name=c("Apis","Puma concolor","Pinus concolor")) expect_is(a, "data.frame") expect_is(b, "data.frame") expect_is(cc, "data.frame") expect_is(a$input, "character") expect_is(a$taxid, "integer") }) test_that("bold_tax_name fails well", { skip_on_cran() expect_error(bold_tax_name(), "argument \"name\" is missing, with no default") }) test_that("fuzzy works", { skip_on_cran() aa <- bold_tax_name(name='Diplur', fuzzy=TRUE) aa_not <- bold_tax_name(name='Diplur', fuzzy=FALSE) expect_is(aa, "data.frame") expect_is(aa$input, "character") expect_gt(NROW(aa), NROW(aa_not)) }) bold/tests/testthat/test-bold_specimens.R0000644000176200001440000000173713134177202020272 0ustar liggesusers# tests for bold_specimens fxn in bold context("bold_specimens") test_that("bold_specimens returns the correct dimensions or values", { skip_on_cran() a <- bold_specimens(taxon='Osmia') b <- bold_specimens(taxon='Osmia', format='xml', response=TRUE) expect_equal(b$status_code, 200) expect_equal(b$response_headers$`content-type`, "application/x-download") expect_is(a, "data.frame") expect_is(b, "HttpResponse") expect_is(a$recordID, "integer") expect_is(a$processid, "character") expect_is(b$response_headers, "list") }) test_that("Throws warning on call that takes forever including timeout in callopts", { skip_on_cran() expect_error(bold_specimens(geo='Costa Rica', timeout_ms = 2), "Timeout was reached") }) test_that("bold_seq returns correct thing when parameters empty or not given", { skip_on_cran() expect_error(bold_specimens(taxon=''), "must provide a non-empty value") expect_error(bold_specimens(), "must provide a non-empty value") }) bold/tests/testthat/test-bold_seq.R0000644000176200001440000000150613121303572017063 0ustar liggesusers# tests for bold_seq fxn in bold context("bold_seq") test_that("bold_seq returns the correct dimensions/classes", { skip_on_cran() a <- bold_seq(taxon='Coelioxys') b <- bold_seq(bin='BOLD:AAA5125') c <- bold_seq(taxon='Coelioxys', response=TRUE) expect_equal(c$status_code, 200) expect_equal(c$response_headers$`content-type`, "application/x-download") expect_is(a, "list") expect_is(b, "list") expect_is(a[[1]], "list") expect_is(a[[1]]$id, "character") expect_is(a[[1]]$sequence, "character") expect_is(c, "HttpResponse") expect_is(c$response_headers, "list") }) test_that("bold_seq returns correct error when parameters empty or not given", { skip_on_cran() expect_error(bold_seq(taxon = ''), "must provide a non-empty value") expect_error(bold_seq(), "must provide a non-empty value") }) bold/tests/testthat/test-bold_seqspec.R0000644000176200001440000000165613121303643017743 0ustar liggesusers# tests for bold_seqspec fxn in bold context("bold_seqspec") test_that("bold_seqspec returns the correct dimensions or values", { skip_on_cran() a <- bold_seqspec(taxon='Osmia') b <- bold_seqspec(taxon='Osmia', response=TRUE) c <- bold_seqspec(taxon='Osmia', sepfasta=TRUE) expect_equal(b$status_code, 200) expect_equal(b$response_headers$`content-type`, "application/x-download") expect_is(a, "data.frame") expect_is(b, "HttpResponse") expect_is(c, "list") expect_is(c$data, "data.frame") expect_is(c$fasta, "list") expect_is(c$fasta[[1]], "character") expect_is(a$recordID, "integer") expect_is(a$directions, "character") expect_is(b$response_headers, "list") }) test_that("bold_seq returns correct error when parameters empty or not given", { skip_on_cran() expect_error(bold_seqspec(taxon=''), "must provide a non-empty value") expect_error(bold_seqspec(), "must provide a non-empty value") }) bold/tests/testthat/test-bold_identify.R0000644000176200001440000000151013121307503020077 0ustar liggesuserscontext("bold_identify") seq <- sequences$seq1 test_that("bold_identify works as expected", { skip_on_cran() aa <- bold_identify(seq) expect_is(aa, 'list') expect_is(aa[[1]], 'data.frame') expect_is(aa[[1]]$ID, 'character') }) test_that("bold_identify db param works as expected", { skip_on_cran() aa <- bold_identify(seq, db = 'COX1_SPECIES') expect_is(aa, 'list') expect_is(aa[[1]], 'data.frame') expect_is(aa[[1]]$ID, 'character') }) test_that("bold_identify response param works as expected", { skip_on_cran() aa <- bold_identify(seq, response = TRUE) expect_is(aa, "list") expect_is(aa[[1]], "HttpResponse") expect_equal(aa[[1]]$status_code, 200) }) test_that("bold_identify fails well", { skip_on_cran() expect_error(bold_identify(), "argument \"sequences\" is missing, with no default") }) bold/tests/test-all.R0000644000176200001440000000004512341431353014202 0ustar liggesuserslibrary(testthat) test_check('bold') bold/NAMESPACE0000644000176200001440000000147413134207626012423 0ustar liggesusers# Generated by roxygen2: do not edit by hand S3method(bold_identify_parents,data.frame) S3method(bold_identify_parents,default) S3method(bold_identify_parents,list) S3method(print,boldtrace) export(bold_filter) export(bold_identify) export(bold_identify_parents) export(bold_seq) export(bold_seqspec) export(bold_specimens) export(bold_stats) export(bold_tax_id) export(bold_tax_name) export(bold_trace) export(read_trace) importFrom(crul,HttpClient) importFrom(crul,url_build) importFrom(jsonlite,fromJSON) importFrom(plyr,rbind.fill) importFrom(reshape,sort_df) importFrom(stringr,str_replace) importFrom(stringr,str_replace_all) importFrom(stringr,str_split) importFrom(xml2,as_list) importFrom(xml2,read_xml) importFrom(xml2,xml_find_all) importFrom(xml2,xml_find_first) importFrom(xml2,xml_name) importFrom(xml2,xml_text) bold/NEWS.md0000644000176200001440000001071113134225567012300 0ustar liggesusersbold 0.5.0 ========== ### NEW FEATURES * Now using BOLD's v4 API throughout the package. This was essentially just a change of the BASE URL for each request (#30) * Now using `crul` for HTTP requests. Only really affects users in that specifying curl options works slightly differenlty (#42) ### BUG FIXES * `marker` parameter in `bold_seqspec` was and maybe still is not working, in the sense that using the parameter doesn't always limit results to the marker you specify. Not really fixed - watch out for it, and filter after you get results back to get markers you want. (#25) * Fixed bug in `bold_identify_parents` - was failing when no match for a parent name. (#41) thx @VascoElbrecht * `tsv` results were erroring in `bold_specimens` and other fxns (#46) - fixed by switching to new BOLD v4 API (#30) ### MINOR IMPROVEMENTS * Namespace calls to base pkgs for `stats` and `utils` - replaced `is` with `inherits` (#39) bold 0.4.0 ========== ### NEW FEATURES * New function `bold_identify_parents()` to add taxonomic information to the output of `bold_identif()`. We take the taxon names from `bold_identify` output, and use `bold_tax_name` to get the taxonomic ID, passing it to `bold_tax_id` to get the parent names, then attaches those to the input data. There are two options given what you put for the `wide` parameter. If `TRUE` you get data.frames of the same dimensions with parent rank name and ID as new columns (for each name going up the hierarchy) - while if `FALSE` you get a long data.frame. thanks @dougwyu for inspiring this (#36) ### MINOR IMPROVEMENTS * replace `xml2::xml_find_one` with `xml2::xml_find_first` (#33) * Fix description of `db` options in `bold_identify` man file - COX1 and COX1_SPECIES were switched (#37) thanks for pointing that out @dougwyu ### BUG FIXES * Fix to `bold_tax_id` for when some elements returned from the BOLD API were empty/`NULL` (#32) thanks @fmichonneau !! bold 0.3.5 ========== ### MINOR IMPROVEMENTS * Added more tests to the test suite (#28) ### BUG FIXES * Fixed a bug in an internal data parser (#27) bold 0.3.4 ========== ### NEW FEATURES * Added a code of conduct ### MINOR IMPROVEMENTS * Switched to `xml2` from `XML` as the XML parser for this package (#26) * Fixes to `bold_trace()` to create dir and tar file when it doesn't already exist ### BUG FIXES * Fixed odd problem where sometimes resulting data from HTTP request was garbled on `content(x, "text")`, so now using `rawToChar(content(x))`, which works (#24) bold 0.3.0 ========== ### MINOR IMPROVEMENTS * Explicitly import non-base R functions (#22) * Better package level manual file bold 0.2.6 ========== ### MINOR IMPROVEMENTS * `sangerseqR` package now in Suggests for reading trace files, and is only used in `bold_trace()` function. * General code tidying, reduction of code duplication. * `bold_trace()` gains two new parameters: `overwrite` to choose whether to overwrite an existing file of the same name or not, `progress` to show a progress bar for downloading or not. * `bold_trace()` gains a print method to show a tidy summary of the trace file downloaded. ### BUG FIXES * Fixed similar bugs in `bold_tax_name()` (#17) and `bold_tax_id()` (#18) in which species that were missing from the BOLD database returned empty arrays but 200 status codes. Parsing those as failed attempts now. Also fixes problem in taxize in `bold_search()` that use these two functions. bold 0.2.0 ========== ### NEW FEATURES * Package gains two new functions for working with the BOLD taxonomy APIs: `bold_tax_name()` and `bold_tax_id()`, which search for taxonomic data from BOLD using either names or BOLD identifiers, respectively. (#11) * Two new packages in Imports: `jsonlite` and `reshape`. ### MINOR IMPROVEMENTS * Added new taxonomy API functions to the vignette (#14) * Added reference URLS to all function doc files to allow easy reference for the appropriate API docs. * `callopts` parameter changed to `...` throughout the package, so that passing on options to `httr::GET` is done via named parameters, e.g., `config=verbose()`. (#13) * Added examples of doing curl debugging throughout man pages. bold 0.1.2 ========== ### MINOR IMPROVEMENTS * Improved the vignette (#8) * Added small function to print helpful message when user inputs no parameters or zero length parameter values. ### BUG FIXES * Fixed some broken tests with the new `httr` (v0.4) (#9), and added a few more tests (#7) bold 0.1.0 ========== ### NEW FEATURES * released to CRAN bold/data/0000755000176200001440000000000012370735031012104 5ustar liggesusersbold/data/sequences.RData0000644000176200001440000000122512370735031015014 0ustar liggesusersUNA }pR$#aQ !%|Ue/Ѝۏ}uz;-˲_ݲ??zz~y;?=Ώ@~Uؽ{e}6^+w| )(2>jS։=fIҜУD&'eqG BP8Õ.`0ތ2elQA%~"N=T 8FLnpD<7Z"P!%ſI90bdJ3I䂢I9gJW L2@@g4%o?Ntm!Nkr>|ZcŐJKRf0=4xIxIi|Y+#/kO*CŇgJYE$3ya8/G;p,0j5Z7=͛Q$Kv ű 4h >]**4Y݆Yيbh 2Z $DAQ+憬6CA$|?QGLEObJ)rgre{gvM@e{Ԅul:G@ql %;W]H MP ǔ Y&tT<8}3%Z;y?{$=?l~C(7.Wbold/R/0000755000176200001440000000000013134206376011400 5ustar liggesusersbold/R/bold-package.R0000644000176200001440000000337513134212646014041 0ustar liggesusers#' bold: A programmatic interface to the Barcode of Life data. #' #' @section About: #' #' This package gives you access to data from BOLD System #' \url{http://www.boldsystems.org/} via their API #' (\url{http://v4.boldsystems.org/index.php/api_home}) #' #' @section Functions: #' #' \itemize{ #' \item \code{\link{bold_specimens}} - Search for specimen data. #' \item \code{\link{bold_seq}} - Search for and retrieve sequences. #' \item \code{\link{bold_seqspec}} - Get sequence and specimen data together. #' \item \code{\link{bold_trace}} - Get trace files - saves to disk. #' \item \code{\link{read_trace}} - Read trace files into R. #' \item \code{\link{bold_tax_name}} - Get taxonomic names via input names. #' \item \code{\link{bold_tax_id}} - Get taxonomic names via BOLD identifiers. #' \item \code{\link{bold_identify}} - Search for match given a COI sequence. #' } #' #' Interestingly, they provide xml and tsv format data for the specimen data, #' while they provide fasta data format for the sequence data. So for the #' specimen data you can get back raw XML, or a data frame parsed from the #' tsv data, while for sequence data you get back a list (b/c sequences are #' quite long and would make a data frame unwieldy). #' #' @importFrom crul HttpClient url_build #' @importFrom xml2 read_xml xml_find_all xml_find_first xml_text #' xml_name as_list #' @importFrom jsonlite fromJSON #' @importFrom reshape sort_df #' @importFrom plyr rbind.fill #' @docType package #' @name bold-package #' @aliases bold NULL #' List of 3 nucleotide sequences to use in examples for the #' \code{\link{bold_identify}} function #' #' @details Each sequence is a character string, of lengths 410, 600, and 696. #' @name sequences #' @docType data #' @keywords data NULL bold/R/bold_specimens.R0000644000176200001440000000425513134217620014511 0ustar liggesusers#' Search BOLD for specimens. #' #' @export #' @template args #' @template otherargs #' @references #' \url{http://v4.boldsystems.org/index.php/resources/api?type=webservices} #' #' @param format (character) One of xml, json, tsv (default). tsv format gives #' back a data.frame object. xml gives back parsed XML as \code{xml_document} #' object. 'json' (JavaScript Object Notation) and 'dwc' (Darwin Core Archive) #' are supported in theory, but the JSON can be malformed, so we don't support #' that here, and the DWC option actually returns TSV. #' #' @examples \dontrun{ #' bold_specimens(taxon='Osmia') #' bold_specimens(taxon='Osmia', format='xml') #' bold_specimens(taxon='Osmia', response=TRUE) #' res <- bold_specimens(taxon='Osmia', format='xml', response=TRUE) #' res$url #' res$status_code #' res$response_headers #' #' # More than 1 can be given for all search parameters #' bold_specimens(taxon=c('Coelioxys','Osmia')) #' #' ## curl debugging #' ### These examples below take a long time, so you can set a timeout so that #' ### it stops by X sec #' head(bold_specimens(taxon='Osmia', verbose = TRUE)) #' # head(bold_specimens(geo='Costa Rica', timeout_ms = 6)) #' } bold_specimens <- function(taxon = NULL, ids = NULL, bin = NULL, container = NULL, institutions = NULL, researchers = NULL, geo = NULL, response=FALSE, format = 'tsv', ...) { format <- match.arg(format, choices = c('xml', 'tsv')) args <- bc(list(taxon=pipeornull(taxon), geo=pipeornull(geo), ids=pipeornull(ids), bin=pipeornull(bin), container=pipeornull(container), institutions=pipeornull(institutions), researchers=pipeornull(researchers), format = format)) check_args_given_nonempty(args, c('taxon','ids','bin','container', 'institutions','researchers','geo')) out <- b_GET(paste0(bbase(), 'API_Public/specimen'), args, ...) if (response) { out } else { tt <- out$parse("UTF-8") switch(format, xml = xml2::read_xml(tt), tsv = utils::read.delim(text = tt, header = TRUE, sep = "\t", stringsAsFactors = FALSE) ) } } bold/R/bold_identify.R0000644000176200001440000000752013134220464014334 0ustar liggesusers#' Search for matches to sequences against the BOLD COI database. #' #' @export #' #' @param sequences (character) Returns all records containing matching marker #' codes. Required. #' @param db (character) The database to match against, one of COX1, #' COX1_SPECIES, COX1_SPECIES_PUBLIC, OR COX1_L604bp. See Details for #' more information. #' @param response (logical) Note that response is the object that returns #' from the Curl call, useful for debugging, and getting detailed info on #' the API call. #' @param ... Further args passed on to \code{\link[crul]{HttpClient}}, main #' purpose being curl debugging #' #' @section db parmeter options: #' \itemize{ #' \item COX1 Every COI barcode record on BOLD with a minimum sequence #' length of 500bp (warning: unvalidated library and includes records without #' species level identification). This includes many species represented by #' only one or two specimens as well as all species with interim taxonomy. This #' search only returns a list of the nearest matches and does not provide a #' probability of placement to a taxon. #' \item COX1_SPECIES Every COI barcode record with a species level #' identification and a minimum sequence length of 500bp. This includes #' many species represented by only one or two specimens as well as all #' species with interim taxonomy. #' \item COX1_SPECIES_PUBLIC All published COI records from BOLD and GenBank #' with a minimum sequence length of 500bp. This library is a collection of #' records from the published projects section of BOLD. #' \item OR COX1_L604bp Subset of the Species library with a minimum sequence #' length of 640bp and containing both public and private records. This library #' is intended for short sequence identification as it provides maximum overlap #' with short reads from the barcode region of COI. #' } #' #' @section Named outputs: #' To maintain names on the output list of data make sure to pass in a #' named list to the \code{sequences} parameter. You can for example, #' take a list of sequences, and use \code{\link{setNames}} to set names. #' #' @return A data.frame with details for each specimen matched. if a #' failed request, returns \code{NULL} #' @references #' \url{http://v4.boldsystems.org/index.php/resources/api?type=idengine} #' @seealso \code{\link{bold_identify_parents}} #' @examples \dontrun{ #' seq <- sequences$seq1 #' res <- bold_identify(sequences=seq) #' head(res[[1]]) #' head(bold_identify(sequences=seq, db='COX1_SPECIES')[[1]]) #' } bold_identify <- function(sequences, db = 'COX1', response=FALSE, ...) { foo <- function(a, b){ args <- bc(list(sequence = a, db = b)) cli <- crul::HttpClient$new(url = paste0(bbase(), 'Ids_xml')) out <- cli$get(query = args, ...) out$raise_for_status() stopifnot(out$headers$`content-type` == 'text/xml') if (response) { out } else { tt <- out$parse('UTF-8') xml <- xml2::read_xml(tt) nodes <- xml2::xml_find_all(xml, "//match") toget <- c("ID","sequencedescription","database", "citation","taxonomicidentification","similarity") outlist <- lapply(nodes, function(x){ tmp2 <- vapply(toget, function(y) { tmp <- xml2::xml_find_first(x, y) stats::setNames(xml2::xml_text(tmp), xml2::xml_name(tmp)) }, "") spectmp <- xml2::as_list(xml2::xml_find_first(x, "specimen")) spectmp <- unnest(spectmp) names(spectmp) <- c('specimen_url','specimen_country', 'specimen_lat','specimen_lon') spectmp[sapply(spectmp, is.null)] <- NA data.frame(c(tmp2, spectmp), stringsAsFactors = FALSE) }) do.call(rbind.fill, outlist) } } lapply(sequences, foo, b = db) } unnest <- function(x){ if (is.null(names(x))) { list(unname(unlist(x))) } else { do.call("c", lapply(x, unnest)) } } bold/R/bold_trace.R0000644000176200001440000000625313134226522013622 0ustar liggesusers#' Get BOLD trace files #' #' @export #' @template args #' @references #' \url{http://v4.boldsystems.org/index.php/resources/api?type=webservices} #' #' @param marker (character) Returns all records containing matching #' marker codes. #' @param dest (character) A directory to write the files to #' @param overwrite (logical) Overwrite existing directory and file? #' @param progress (logical) Print progress or not. NOT AVAILABLE FOR NOW. #' HOPEFULLY WILL RETURN SOON. #' @param ... Further args passed on to \code{\link[crul]{HttpClient}} #' @param x Object to print or read. #' #' @examples \dontrun{ #' # Use a specific destination directory #' bold_trace(taxon='Bombus', geo='Alaska', dest="~/mytarfiles") #' #' # Another example #' # bold_trace(ids='ACRJP618-11', dest="~/mytarfiles") #' # bold_trace(ids=c('ACRJP618-11','ACRJP619-11'), dest="~/mytarfiles") #' #' # read file in #' x <- bold_trace(ids=c('ACRJP618-11','ACRJP619-11'), dest="~/mytarfiles") #' (res <- read_trace(x$ab1[2])) #' #' # The progress dialog is pretty verbose, so quiet=TRUE is a nice touch, #' # but not by default #' # Beware, this one take a while #' # x <- bold_trace(taxon='Osmia', quiet=TRUE) #' #' if (requireNamespace("sangerseqR", quietly = TRUE)) { #' library("sangerseqR") #' primarySeq(res) #' secondarySeq(res) #' head(traceMatrix(res)) #' } #' } bold_trace <- function(taxon = NULL, ids = NULL, bin = NULL, container = NULL, institutions = NULL, researchers = NULL, geo = NULL, marker = NULL, dest=NULL, overwrite = TRUE, progress = TRUE, ...) { if (!requireNamespace("sangerseqR", quietly = TRUE)) { stop("Please install sangerseqR", call. = FALSE) } args <- bc(list(taxon=pipeornull(taxon), geo=pipeornull(geo), ids=pipeornull(ids), bin=pipeornull(bin), container=pipeornull(container), institutions=pipeornull(institutions), researchers=pipeornull(researchers), marker=pipeornull(marker))) url <- crul::url_build(paste0(bbase(), 'API_Public/trace'), query = args) if (is.null(dest)) { destfile <- paste0(getwd(), "/bold_trace_files.tar") destdir <- paste0(getwd(), "/bold_trace_files") } else { destdir <- path.expand(dest) destfile <- paste0(destdir, "/bold_trace_files.tar") } dir.create(destdir, showWarnings = FALSE, recursive = TRUE) if (!file.exists(destfile)) file.create(destfile, showWarnings = FALSE) cli <- crul::HttpClient$new(url = url) res <- cli$get(disk = destfile, ...) utils::untar(destfile, exdir = destdir) files <- list.files(destdir, full.names = TRUE) ab1 <- list.files(destdir, pattern = ".ab1", full.names = TRUE) structure(list(destfile = destfile, destdir = destdir, ab1 = ab1, args = args), class = "boldtrace") } #' @export print.boldtrace <- function(x, ...){ cat("\n", "\n\n") ff <- x$ab1[1:min(10, length(x$ab1))] if (length(ff) < length(x$ab1)) ff <- c(ff, "...") cat(ff, sep = "\n") } #' @export #' @rdname bold_trace read_trace <- function(x){ if (inherits(x, "boldtrace")) { if (length(x$ab1) > 1) stop("Number of paths > 1, just pass one in", call. = FALSE) sangerseqR::readsangerseq(x$ab1) } else { sangerseqR::readsangerseq(x) } } bold/R/bold_filter.R0000644000176200001440000000356413134226477014024 0ustar liggesusers#' Get BOLD specimen + sequence data. #' #' @export #' @param x (data.frame) a data.frame, as returned from #' \code{\link{bold_seqspec}}. Note that some combinations of parameters #' in \code{\link{bold_seqspec}} don't return a data.frame. Stops with #' error message if this is not a data.frame. Required. #' @param by (character) the column by which to group. For example, #' if you want the longest sequence for each unique species name, then #' pass \strong{species_name}. If the column doesn't exist, error #' with message saying so. Required. #' @param how (character) one of "max" or "min", which get used as #' \code{which.max} or \code{which.min} to get the longest or shortest #' sequence, respectively. Note that we remove gap/alignment characters #' (\code{-}) #' @return a tibble/data.frame #' @examples \dontrun{ #' res <- bold_seqspec(taxon='Osmia') #' maxx <- bold_filter(res, by = "species_name") #' minn <- bold_filter(res, by = "species_name", how = "min") #' #' vapply(maxx$nucleotides, nchar, 1, USE.NAMES = FALSE) #' vapply(minn$nucleotides, nchar, 1, USE.NAMES = FALSE) #' } bold_filter <- function(x, by, how = "max") { if (!inherits(x, "data.frame")) stop("'x' must be a data.frame", call. = FALSE) if (!how %in% c("min", "max")) stop("'how' must be one of 'min' or 'max'", call. = FALSE) if (!by %in% names(x)) stop(sprintf("'%s' is not a valid column in 'x'", by), call. = FALSE) xsp <- split(x, x[[by]]) tibble::as_data_frame(setrbind(lapply(xsp, function(z) { lgts <- vapply(z$nucleotides, function(w) nchar(gsub("-", "", w)), 1, USE.NAMES = FALSE) z[eval(parse(text = paste0("which.", how)))(lgts), ] }))) } setrbind <- function(x) { (xxx <- data.table::setDF( data.table::rbindlist(x, fill = TRUE, use.names = TRUE)) ) } bold/R/bold_tax_id.R0000644000176200001440000000414413134220021013755 0ustar liggesusers#' Search BOLD for taxonomy data by BOLD ID. #' #' @export #' @param id (integer) One or more BOLD taxonomic identifiers. required. #' @param dataTypes (character) Specifies the datatypes that will be #' returned. 'all' returns all data. 'basic' returns basic taxon information. #' 'images' returns specimen images. #' @param includeTree (logical) If TRUE (default: FALSE), returns a list #' containing information for parent taxa as well as the specified taxon. #' @template otherargs #' @references #' \url{http://v4.boldsystems.org/index.php/resources/api?type=taxonomy} #' @seealso \code{bold_tax_name} #' @examples \dontrun{ #' bold_tax_id(id=88899) #' bold_tax_id(id=88899, includeTree=TRUE) #' bold_tax_id(id=88899, includeTree=TRUE, dataTypes = "stats") #' bold_tax_id(id=c(88899,125295)) #' #' ## dataTypes parameter #' bold_tax_id(id=88899, dataTypes = "basic") #' bold_tax_id(id=88899, dataTypes = "stats") #' bold_tax_id(id=88899, dataTypes = "images") #' bold_tax_id(id=88899, dataTypes = "geo") #' bold_tax_id(id=88899, dataTypes = "sequencinglabs") #' bold_tax_id(id=88899, dataTypes = "depository") #' bold_tax_id(id=c(88899,125295), dataTypes = "geo") #' bold_tax_id(id=c(88899,125295), dataTypes = "images") #' #' ## Passing in NA #' bold_tax_id(id = NA) #' bold_tax_id(id = c(88899,125295,NA)) #' #' ## get http response object only #' bold_tax_id(id=88899, response=TRUE) #' bold_tax_id(id=c(88899,125295), response=TRUE) #' #' ## curl debugging #' bold_tax_id(id=88899, verbose = TRUE) #' } bold_tax_id <- function(id, dataTypes = 'basic', includeTree = FALSE, response = FALSE, ...) { tmp <- lapply(id, function(x) get_response(args = bc(list( taxId = x, dataTypes = dataTypes, includeTree = if (includeTree) 'true' else NULL)), url = paste0(bbase(), "API_Tax/TaxonData"), ...) ) if (response) { tmp } else { res <- do.call(rbind.fill, Map(process_response, x = tmp, y = id, z = includeTree, w = dataTypes)) if (NCOL(res) == 1) { res$noresults <- NA return(res) } else { res } } } bold/R/bold_seqspec.R0000644000176200001440000000653613134217664014202 0ustar liggesusers#' Get BOLD specimen + sequence data. #' #' @export #' @template args #' @template otherargs #' @references #' \url{http://v4.boldsystems.org/index.php/resources/api?type=webservices} #' #' @param marker (character) Returns all records containing matching marker #' codes. See Details. #' @param format (character) One of xml or tsv (default). tsv format gives #' back a data.frame object. xml gives back parsed xml as a #' @param sepfasta (logical) If \code{TRUE}, the fasta data is separated into #' a list with names matching the processid's from the data frame. #' Default: \code{FALSE} #' #' @return Either a data.frame, parsed xml, a http response object, or a list #' with length two (a data.frame w/o nucleotide data, and a list with #' nucleotide data) #' #' @section Marker: #' Notes from BOLD on the \code{marker} param: #' "All markers for a specimen matching the search string will be returned. #' ie. A record with COI-5P and ITS will return sequence data for both #' markers even if only COI-5P was specified." #' #' You will likely end up with data with markers that you did not request - #' just be sure to filter those out as needed. #' #' @examples \dontrun{ #' bold_seqspec(taxon='Osmia') #' bold_seqspec(taxon='Osmia', format='xml') #' bold_seqspec(taxon='Osmia', response=TRUE) #' res <- bold_seqspec(taxon='Osmia', sepfasta=TRUE) #' res$fasta[1:2] #' res$fasta['GBAH0293-06'] #' #' # records that match a marker name #' res <- bold_seqspec(taxon="Melanogrammus aeglefinus", marker="COI-5P") #' #' # records that match a geographic locality #' res <- bold_seqspec(taxon="Melanogrammus aeglefinus", geo="Canada") #' #' ## curl debugging #' ### You can do many things, including get verbose output on the curl call, #' ### and set a timeout #' head(bold_seqspec(taxon='Osmia', verbose = TRUE)) #' ## timeout #' # head(bold_seqspec(taxon='Osmia', timeout_ms = 1)) #' } bold_seqspec <- function(taxon = NULL, ids = NULL, bin = NULL, container = NULL, institutions = NULL, researchers = NULL, geo = NULL, marker = NULL, response=FALSE, format = 'tsv', sepfasta=FALSE, ...) { format <- match.arg(format, choices = c('xml', 'tsv')) args <- bc(list(taxon = pipeornull(taxon), geo = pipeornull(geo), ids = pipeornull(ids), bin = pipeornull(bin), container = pipeornull(container), institutions = pipeornull(institutions), researchers = pipeornull(researchers), marker = pipeornull(marker), combined_download = format)) check_args_given_nonempty(args, c('taxon', 'ids', 'bin', 'container', 'institutions', 'researchers', 'geo', 'marker')) out <- b_GET(paste0(bbase(), 'API_Public/combined'), args, ...) if (response) { out } else { tt <- paste0(rawToChar(out$content, multiple = TRUE), collapse = "") if (tt == "") return(NA) temp <- switch( format, xml = xml2::read_xml(tt), tsv = utils::read.delim(text = tt, header = TRUE, sep = "\t", stringsAsFactors = FALSE) ) if (!sepfasta) { temp } else { if (format == "tsv") { fasta <- as.list(temp$nucleotides) names(fasta) <- temp$processid df <- temp[ , !names(temp) %in% "nucleotides" ] list(data = df, fasta = fasta) } else { temp } } } } bold/R/bold_stats.R0000644000176200001440000000375113134224120013652 0ustar liggesusers#' Get BOLD stats #' #' @export #' @inheritParams bold_specimens #' @param dataType (character) one of "overview" or "drill_down" (default). #' "drill_down": a detailed summary of information which provides record #' counts by [BINs, Country, Storing Institution, Species]. "overview": #' the total counts of [BINs, Countries, Storing Institutions, Orders, #' Families, Genus, Species] #' @references #' \url{http://v4.boldsystems.org/index.php/resources/api?type=webservices} #' #' @examples \dontrun{ #' x <- bold_stats(taxon='Osmia') #' x$total_records #' x$records_with_species_name #' x$bins #' x$countries #' x$depositories #' x$order #' x$family #' x$genus #' x$species #' #' # just get all counts #' lapply(Filter(is.list, x), "[[", "count") #' #' res <- bold_stats(taxon='Osmia', response=TRUE) #' res$url #' res$status_code #' res$response_headers #' #' # More than 1 can be given for all search parameters #' bold_stats(taxon=c('Coelioxys','Osmia')) #' #' ## curl debugging #' ### These examples below take a long time, so you can set a timeout so that #' ### it stops by X sec #' bold_stats(taxon='Osmia', verbose = TRUE) #' # bold_stats(geo='Costa Rica', timeout_ms = 6) #' } bold_stats <- function(taxon = NULL, ids = NULL, bin = NULL, container = NULL, institutions = NULL, researchers = NULL, geo = NULL, dataType = "drill_down", response=FALSE, ...) { args <- bc(list(taxon = pipeornull(taxon), geo = pipeornull(geo), ids = pipeornull(ids), bin = pipeornull(bin), container = pipeornull(container), institutions = pipeornull(institutions), researchers = pipeornull(researchers), dataType = dataType, format = "json")) check_args_given_nonempty(args, c('taxon','ids','bin','container', 'institutions','researchers','geo')) out <- b_GET(paste0(bbase(), 'API_Public/stats'), args, ...) if (response) return(out) jsonlite::fromJSON(out$parse("UTF-8")) } bold/R/bold_tax_name.R0000644000176200001440000000407713134212564014323 0ustar liggesusers#' Search BOLD for taxonomy data by taxonomic name #' #' @export #' @param name (character) One or more scientific names. required. #' @param fuzzy (logical) Whether to use fuzzy search or not (default: FALSE). #' @template otherargs #' @references #' \url{http://v4.boldsystems.org/index.php/resources/api?type=taxonomy} #' @details The \code{dataTypes} parameter is not supported in this function. #' If you want to use that parameter, get an ID from this function and pass #' it into \code{bold_tax_id}, and then use the \code{dataTypes} parameter. #' @seealso \code{\link{bold_tax_id}} #' @examples \dontrun{ #' bold_tax_name(name='Diplura') #' bold_tax_name(name='Osmia') #' bold_tax_name(name=c('Diplura','Osmia')) #' bold_tax_name(name=c("Apis","Puma concolor","Pinus concolor")) #' bold_tax_name(name='Diplur', fuzzy=TRUE) #' bold_tax_name(name='Osm', fuzzy=TRUE) #' #' ## get http response object only #' bold_tax_name(name='Diplura', response=TRUE) #' bold_tax_name(name=c('Diplura','Osmia'), response=TRUE) #' #' ## Names with no data in BOLD database #' bold_tax_name("Nasiaeshna pentacantha") #' bold_tax_name(name = "Cordulegaster erronea") #' bold_tax_name(name = "Cordulegaster erronea", response=TRUE) #' #' ## curl debugging #' bold_tax_name(name='Diplura', verbose = TRUE) #' } bold_tax_name <- function(name, fuzzy = FALSE, response = FALSE, ...) { tmp <- lapply(name, function(x) get_response(bc(list(taxName = x, fuzzy = if (fuzzy) 'true' else NULL)), url = paste0(bbase(), "API_Tax/TaxonSearch"), ...) ) if (response) { tmp } else { (vvv <- data.table::setDF(data.table::rbindlist( Map(process_tax_name, tmp, name), use.names = TRUE, fill = TRUE) )) } } process_tax_name <- function(x, y) { tt <- rawToChar(x$content) out <- if (x$status_code > 202) "stop" else jsonlite::fromJSON(tt, flatten = TRUE) if ( length(out) == 0 || identical(out[[1]], list()) || out == "stop" ) { data.frame(input = y, stringsAsFactors = FALSE) } else { data.frame(out$top_matched_names, input = y, stringsAsFactors = FALSE) } } bold/R/bold_identify_parents.R0000644000176200001440000000660513134226537016102 0ustar liggesusers#' Add taxonomic parent names to a data.frame #' #' @export #' @param x (data.frame/list) list of data.frames - the output from a call to #' \code{\link{bold_identify}}. or a single data.frame from the output from #' same. required. #' @param wide (logical) output in long or wide format. See Details. #' Default: \code{FALSE} #' #' @details This function gets unique set of taxonomic names from the input #' data.frame, then queries \code{\link{bold_tax_name}} to get the #' taxonomic ID, passing it to \code{\link{bold_tax_id}} to get the parent #' names, then attaches those to the input data. #' #' Records in the input data that do not have matches for parent names #' simply get NA values in the added columns. #' #' @section wide vs long format: #' When \code{wide = FALSE} you get many rows for each record. Essentially, #' we \code{cbind} the taxonomic classification onto the one row from the #' result of \code{\link{bold_identify}}, giving as many rows as there are #' taxa in the taxonomic classification. #' #' When \code{wide = TRUE} you get one row for each record - thus the #' dimensions of the input data stay the same. For this option, we take just #' the rows for taxonomic ID and name for each taxon in the taxonomic #' classification, and name the columns by the taxon rank, so you get #' \code{phylum} and \code{phylum_id}, and so on. #' #' @return a list of the same length as the input #' #' @examples \dontrun{ #' df <- bold_identify(sequences = sequences$seq2) #' #' # long format #' out <- bold_identify_parents(df) #' str(out) #' head(out[[1]]) #' #' # wide format #' out <- bold_identify_parents(df, wide = TRUE) #' str(out) #' head(out[[1]]) #' } bold_identify_parents <- function(x, wide = FALSE) { UseMethod("bold_identify_parents") } #' @export bold_identify_parents.default <- function(x, wide = FALSE) { stop("no 'bold_identify_parents' method for ", class(x), call. = FALSE) } #' @export bold_identify_parents.data.frame <- function(x, wide = FALSE) { bold_identify_parents(list(x), wide) } #' @export bold_identify_parents.list <- function(x, wide = FALSE) { # get unique set of names uniqnms <- unique(unname(unlist(lapply(x, function(z) z$taxonomicidentification)))) if (is.null(uniqnms)) { stop("no fields 'taxonomicidentification' found in input", call. = FALSE) } # get parent names via bold_tax_name and bold_tax_id out <- stats::setNames(lapply(uniqnms, function(w) { tmp <- bold_tax_name(w) if (!is.null(tmp$taxid)) { tmp2 <- bold_tax_id(tmp$taxid, includeTree = TRUE) tmp2$input <- NULL return(tmp2) } else { NULL } }), uniqnms) # remove length zero elements out <- bc(out) # appply parent names to input data lapply(x, function(z) { if (wide) { # replace each data.frame with a wide version with just # taxid and taxon name (with col names with rank name) out <- lapply(out, function(h) do.call("cbind", (apply(h, 1, function(x) { tmp <- as.list(x[c('taxid', 'taxon')]) tmp$taxid <- as.numeric(tmp$taxid) data.frame(stats::setNames(tmp, paste0(x['tax_rank'], c('_id', ''))), stringsAsFactors = FALSE) })))) } zsplit <- split(z, z$ID) setrbind( bc(lapply(zsplit, function(w) { tmp <- out[names(out) %in% w$taxonomicidentification] if (!length(tmp)) return(w) suppressWarnings(cbind(w, tmp[[1]])) })) ) }) } bold/R/zzz.R0000644000176200001440000000527413134220517012361 0ustar liggesusersbbase <- function() 'http://v4.boldsystems.org/index.php/' bc <- function(x) Filter(Negate(is.null), x) split_fasta <- function(x){ temp <- paste(">", x, sep = "") seq <- str_replace_all(str_split(str_replace(temp[[1]], "\n", "<<<"), "<<<")[[1]][[2]], "\n", "") stuff <- str_split(x, "\\|")[[1]][c(1:3)] list(id = stuff[1], name = stuff[2], gene = stuff[1], sequence = seq) } pipeornull <- function(x){ if (!is.null(x)) { paste0(x, collapse = "|") } else { NULL } } check_args_given_nonempty <- function(arguments, x){ paramnames <- x matchez <- any(paramnames %in% names(arguments)) if (!matchez) { stop(sprintf("You must provide a non-empty value to at least one of\n %s", paste0(paramnames, collapse = "\n "))) } else { arguments_noformat <- arguments[ !names(arguments) %in% 'combined_download' ] argslengths <- vapply(arguments_noformat, nchar, numeric(1), USE.NAMES = FALSE) if (any(argslengths == 0)) { stop(sprintf("You must provide a non-empty value to at least one of\n %s", paste0(paramnames, collapse = "\n "))) } } } process_response <- function(x, y, z, w){ tt <- rawToChar(x$content) out <- if (x$status_code > 202) "stop" else jsonlite::fromJSON(tt) if ( length(out) == 0 || identical(out[[1]], list()) || out == "stop" ) { data.frame(input = y, stringsAsFactors = FALSE) } else { if (w %in% c("stats",'images','geo','sequencinglabs','depository')) out <- out[[1]] trynames <- tryCatch(as.numeric(names(out)), warning = function(w) w) if (!inherits(trynames, "simpleWarning")) names(out) <- NULL if (any(vapply(out, function(x) is.list(x) && length(x) > 0, logical(1)))) { out <- lapply(out, function(x) Filter(length, x)) } else { out <- Filter(length, out) } if (!is.null(names(out))) { df <- data.frame(out, stringsAsFactors = FALSE) } else { df <- do.call(rbind.fill, lapply(out, data.frame, stringsAsFactors = FALSE)) } row.names(df) <- NULL if ("parentid" %in% names(df)) df <- sort_df(df, "parentid") row.names(df) <- NULL data.frame(input = y, df, stringsAsFactors = FALSE) } } get_response <- function(args, url, ...){ cli <- crul::HttpClient$new(url = url) out <- cli$get(query = args, ...) out$raise_for_status() stopifnot(out$headers$`content-type` == 'text/html; charset=utf-8') return(out) } b_GET <- function(url, args, ...){ cli <- crul::HttpClient$new(url = url) out <- cli$get(query = args, ...) out$raise_for_status() if (grepl("html", out$response_headers$`content-type`)) { stop(out$parse("UTF-8")) } return(out) } bold/R/bold_seq.R0000644000176200001440000000427713134212526013317 0ustar liggesusers#' Search BOLD for sequences. #' #' Get sequences for a taxonomic name, id, bin, container, institution, #' researcher, geographic, place, or gene. #' #' @importFrom stringr str_replace_all str_replace str_split #' @export #' @template args #' @template otherargs #' @references #' \url{http://v4.boldsystems.org/index.php/resources/api?type=webservices} #' #' @param marker (character) Returns all records containing matching #' marker codes. #' #' @return A list with each element of length 4 with slots for id, name, #' gene, and sequence. #' #' @examples \dontrun{ #' res <- bold_seq(taxon='Coelioxys') #' bold_seq(taxon='Aglae') #' bold_seq(taxon=c('Coelioxys','Osmia')) #' bold_seq(ids='ACRJP618-11') #' bold_seq(ids=c('ACRJP618-11','ACRJP619-11')) #' bold_seq(bin='BOLD:AAA5125') #' bold_seq(container='ACRJP') #' bold_seq(researchers='Thibaud Decaens') #' bold_seq(geo='Ireland') #' bold_seq(geo=c('Ireland','Denmark')) #' #' # Return the http response object for detailed Curl call response details #' res <- bold_seq(taxon='Coelioxys', response=TRUE) #' res$url #' res$status_code #' res$response_headers #' #' ## curl debugging #' ### You can do many things, including get verbose output on the curl #' ### call, and set a timeout #' bold_seq(taxon='Coelioxys', verbose = TRUE)[1:2] #' # bold_seqspec(taxon='Coelioxys', timeout_ms = 10) #' } bold_seq <- function(taxon = NULL, ids = NULL, bin = NULL, container = NULL, institutions = NULL, researchers = NULL, geo = NULL, marker = NULL, response=FALSE, ...) { args <- bc( list( taxon = pipeornull(taxon), geo = pipeornull(geo), ids = pipeornull(ids), bin = pipeornull(bin), container = pipeornull(container), institutions = pipeornull(institutions), researchers = pipeornull(researchers), marker = pipeornull(marker) ) ) check_args_given_nonempty( args, c('taxon','ids','bin','container','institutions','researchers', 'geo','marker') ) out <- b_GET(paste0(bbase(), 'API_Public/sequence'), args, ...) if (response) { out } else { tt <- out$parse("UTF-8") #tt <- rawToChar(content(out, encoding = "UTF-8")) res <- strsplit(tt, ">")[[1]][-1] lapply(res, split_fasta) } } bold/vignettes/0000755000176200001440000000000013134420302013172 5ustar liggesusersbold/vignettes/bold_vignette.Rmd0000644000176200001440000004774713134226657016527 0ustar liggesusers `bold` is an R package to connect to [BOLD Systems](http://www.boldsystems.org/) via their API. Functions in `bold` let you search for sequence data, specimen data, sequence + specimen data, and download raw trace files. ### bold info + [BOLD home page](http://boldsystems.org/) + [BOLD API docs](http://v4.boldsystems.org/index.php/api_home) ### Using bold **Install** Install `bold` from CRAN ```r install.packages("bold") ``` Or install the development version from GitHub ```r devtools::install_github("ropensci/bold") ``` Load the package ```r library("bold") ``` ### Search for taxonomic names via names `bold_tax_name` searches for names with names. ```r bold_tax_name(name = 'Diplura') #> input taxid taxon tax_rank tax_division parentid parentname #> 1 Diplura 591238 Diplura order Animals 82 Insecta #> 2 Diplura 603673 Diplura genus Protists 53974 Scytosiphonaceae #> taxonrep #> 1 Diplura #> 2 ``` ```r bold_tax_name(name = c('Diplura', 'Osmia')) #> input taxid taxon tax_rank tax_division parentid parentname #> 1 Diplura 591238 Diplura order Animals 82 Insecta #> 2 Diplura 603673 Diplura genus Protists 53974 Scytosiphonaceae #> 3 Osmia 4940 Osmia genus Animals 4962 Megachilinae #> taxonrep #> 1 Diplura #> 2 #> 3 Osmia ``` ### Search for taxonomic names via BOLD identifiers `bold_tax_id` searches for names with BOLD identifiers. ```r bold_tax_id(id = 88899) #> input taxid taxon tax_rank tax_division parentid parentname #> 1 88899 88899 Momotus genus Animals 88898 Momotidae ``` ```r bold_tax_id(id = c(88899, 125295)) #> input taxid taxon tax_rank tax_division parentid parentname #> 1 88899 88899 Momotus genus Animals 88898 Momotidae #> 2 125295 125295 Helianthus genus Plants 100962 Asteraceae ``` ### Search for sequence data only The BOLD sequence API gives back sequence data, with a bit of metadata. The default is to get a list back ```r bold_seq(taxon = 'Coelioxys')[1:2] #> [[1]] #> [[1]]$id #> [1] "FBAPB491-09" #> #> [[1]]$name #> [1] "Coelioxys conica" #> #> [[1]]$gene #> [1] "FBAPB491-09" #> #> [[1]]$sequence #> [1] "---------------------ACCTCTTTAAGAATAATTATTCGTATAGAAATAAGAATTCCAGGATCTTGAATTAATAATGATCAAATTTATAACTCCTTTATTACAGCACATGCATTTTTAATAATTTTTTTTTTAGTTATACCTTTTCTTATTGGAGGATTTGGAAATTGATTAGTACCTTTAATATTAGGATCACCAGATATAGCTTTCCCACGAATAAATAATATTAGATTTTGATTATTACCTCCTTCTTTATTAATATTATTATTAAGTAATTTAATAAATCCCAGACCAGGAACAGGCTGAACAGTTTATCCTCCTTTATCTTTATACACATACCACCCTTCTCCCTCAGTTGATTTAGCAATTTTTTCACTACATCTATCAGGAATCTCTTCTATTATTGGATCTATAAATTTTATTGTTACAATTTTAATAATAAAAAACTTTTCAATAAATTATAATCAAATACCATTATTCCCATGATCTATTTTAATTACTACTATTTTATTATTATTATCACTACCTGTATTAGCTGGTGCTATTACTATATTATTATTTGATCGAAATTTAAATTCTTCTTTTTTTGACCCTATAGGAGGAGGAGACCCAATTTTATACCAACATTTA" #> #> #> [[2]] #> [[2]]$id #> [1] "FBAPC351-10" #> #> [[2]]$name #> [1] "Coelioxys afra" #> #> [[2]]$gene #> [1] "FBAPC351-10" #> #> [[2]]$sequence #> [1] "---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ACGAATAAATAATGTAAGATTTTGACTATTACCTCCCTCAATTTTCTTATTATTATCAAGAACCCTAATTAACCCAAGAGCTGGTACTGGATGAACTGTATATCCTCCTTTATCCTTATATACATTTCATGCCTCACCTTCCGTTGATTTAGCAATTTTTTCACTTCATTTATCAGGAATTTCATCAATTATTGGATCAATAAATTTTATTGTTACAATCTTAATAATAAAAAATTTTTCTTTAAATTATAGACAAATACCATTATTTTCATGATCAGTTTTAATTACTACAATTTTACTTTTATTATCATTACCAATTTTAGCTGGAGCAATTACTATACTCCTATTTGATCGAAATTTAAATACCTCATTCTTTGACCCAATAGGAGGAGGAGATCCAATTTTATATCAACATTTATTT" ``` You can optionally get back the `httr` response object ```r res <- bold_seq(taxon = 'Coelioxys', response = TRUE) res$headers #> $date #> [1] "Tue, 15 Sep 2015 20:02:31 GMT" #> #> $server #> [1] "Apache/2.2.15 (Red Hat)" #> #> $`x-powered-by` #> [1] "PHP/5.3.15" #> #> $`content-disposition` #> [1] "attachment; filename=fasta.fas" #> #> $connection #> [1] "close" #> #> $`transfer-encoding` #> [1] "chunked" #> #> $`content-type` #> [1] "application/x-download" #> #> attr(,"class") #> [1] "insensitive" "list" ``` You can do geographic searches ```r bold_seq(geo = "USA") #> [[1]] #> [[1]]$id #> [1] "GBAN1777-08" #> #> [[1]]$name #> [1] "Macrobdella decora" #> #> [[1]]$gene #> [1] "GBAN1777-08" #> #> [[1]]$sequence #> [1] "---------------------------------ATTGGAATCTTGTATTTCTTATTAGGTACATGATCTGCTATAGTAGGGACCTCTATA---AGAATAATTATTCGAATTGAATTAGCTCAACCTGGGTCGTTTTTAGGAAAT---GATCAAATTTACAATACTATTGTTACTGCTCATGGATTAATTATAATTTTTTTTATAGTAATACCTATTTTAATTGGAGGGTTTGGTAATTGATTAATTCCGCTAATA---ATTGGTTCTCCTGATATAGCTTTTCCACGTCTTAATAATTTAAGATTTTGATTACTTCCGCCATCTTTAACTATACTTTTTTGTTCATCTATAGTCGAAAATGGAGTAGGTACTGGATGGACTATTTACCCTCCTTTAGCAGATAACATTGCTCATTCTGGACCTTCTGTAGATATA---GCAATTTTTTCACTTCATTTAGCTGGTGCTTCTTCTATTTTAGGTTCATTAAATTTTATTACTACTGTAGTTAATATACGATGACCAGGGATATCTATAGAGCGAATTCCTTTATTTATTTGATCCGTAATTATTACTACTGTATTGCTATTATTATCTTTACCAGTATTAGCAGCT---GCTATTTCAATATTATTAACAGATCGTAACTTAAATACTAGATTTTTTGACCCAATAGGAGGAGGGGATCCTATTTTATTCCAACATTTATTTTGATTTTTTGGCCACCCTGAAGTTTATATTTTAATTTTACCAGGATTTGGAGCTATTTCTCATGTAGTAAGTCATAACTCT---AAAAAATTAGAACCGTTTGGATCATTAGGGATATTATATGCAATAATTGGAATTGCAATTTTAGGTTTTATTGTTTGAGCACATCATATATTTACAGTAGGTCTTGATGTAGATACACGAGCTTATTTTACAGCAGCTACAATAGTTATTGCTGTTCCTACAGGAATTAAAGTATTTAGGTGATTG---GCAACT" #> #> #> [[2]] #> [[2]]$id #> [1] "GBAN1780-08" #> #> [[2]]$name #> [1] "Haemopis terrestris" #> #> [[2]]$gene #> [1] "GBAN1780-08" #> #> [[2]]$sequence #> [1] "---------------------------------ATTGGAACWTTWTATTTTATTTTNGGNGCTTGATCTGCTATATTNGGGATCTCAATA---AGGAATATTATTCGAATTGAGCCATCTCAACCTGGGAGATTATTAGGAAAT---GATCAATTATATAATTCATTAGTAACAGCTCATGGATTAATTATAATTTTCTTTATGGTTATGCCTATTTTGATTGGTGGGTTTGGTAATTGATTACTACCTTTAATA---ATTGGAGCCCCTGATATAGCTTTTCCTCGATTAAATAATTTAAGTTTTTGATTATTACCACCTTCATTAATTATATTGTTAAGATCCTCTATTATTGAAAGAGGGGTAGGTACAGGTTGAACCTTATATCCTCCTTTAGCAGATAGATTATTTCATTCAGGTCCATCGGTAGATATA---GCTATTTTTTCATTACATATAGCTGGAGCATCATCTATTTTAGGCTCATTAAACTTTATTTCTACAATTATTAATATACGAATTAAAGGTATAAGATCTGATCGAGTACCTTTATTTGTATGATCAGTTGTTATTACAACAGTTCTGTTATTATTGTCTTTACCTGTTTTAGCTGCA---GCTATTACTATATTATTAACAGATCGTAATTTAAATACTACTTTTTTTGATCCTATAGGAGGTGGAGATCCAGTATTGTTTCAACACTTATTTTGATTTTTTGGTCATCCAGAAGTATATATTTTGATTTTACCAGGATTTGGAGCAATTTCTCATATTATTACAAATAATTCT---AAAAAATTGGAACCTTTTGGATCTCTTGGTATAATTTATGCTATAATTGGAATTGCAGTTTTAGGGTTTATTGTATGAGCCCATCATATATTTACTGTAGGATTAGATGTTGATACTCGAGCTTATTTTACAGCAGCTACTATAGTTATTGCTGTTCCTACTGGTATTAAAGTTTTTAGGTGATTA---GCAACA" #> #> #> [[3]] #> [[3]]$id #> [1] "GBNM0293-06" #> #> [[3]]$name #> [1] "Steinernema carpocapsae" #> #> [[3]]$gene #> [1] "GBNM0293-06" #> #> [[3]]$sequence #> [1] "---------------------------------------------------------------------------------ACAAGATTATCTCTTATTATTCGTTTAGAGTTGGCTCAACCTGGTCTTCTTTTGGGTAAT---GGTCAATTATATAATTCTATTATTACTGCTCATGCTATTCTTATAATTTTTTTCATAGTTATACCTAGAATAATTGGTGGTTTTGGTAATTGAATATTACCTTTAATATTGGGGGCTCCTGATATAAGTTTTCCACGTTTGAATAATTTAAGTTTTTGATTGCTACCAACTGCTATATTTTTGATTTTAGATTCTTGTTTTGTTGACACTGGTTGTGGTACTAGTTGAACTGTTTATCCTCCTTTGAGG---ACTTTAGGTCACCCTGGYAGAAGTGTAGATTTAGCTATTTTTAGTCTTCATTGTGCAGGAATTAGCTCAATTTTAGGGGCTATTAATTTTATATGTACTACAAAAAATCTTCGTAGTAGTTCTATTTCTTTGGAACATATAAGACTTTTTGTTTGGGCTGTTTTTGTTACTGTTTTTTTATTAGTTTTATCTTTACCTGTTTTAGCTGGTGCTATTACTATGCTTTTAACAGACCGTAATTTAAATACTTCTTTTTTT------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------" #> #> #> [[4]] #> [[4]]$id #> [1] "NEONV108-11" #> #> [[4]]$name #> [1] "Aedes thelcter" #> #> [[4]]$gene #> [1] "NEONV108-11" #> #> [[4]]$sequence #> [1] "AACTTTATACTTCATCTTCGGAGTTTGATCAGGAATAGTTGGTACATCATTAAGAATTTTAATTCGTGCTGAATTAAGTCAACCAGGTATATTTATTGGAAATGACCAAATTTATAATGTAATTGTTACAGCTCATGCTTTTATTATAATTTTCTTTATAGTTATACCTATTATAATTGGAGGATTTGGAAATTGACTAGTTCCTCTAATATTAGGAGCCCCAGATATAGCTTTCCCTCGAATAAATAATATAAGTTTTTGAATACTACCTCCCTCATTAACTCTTCTACTTTCAAGTAGTATAGTAGAAAATGGATCAGGAACAGGATGAACAGTTTATCCACCTCTTTCATCTGGAACTGCTCATGCAGGAGCCTCTGTTGATTTAACTATTTTTTCTCTTCATTTAGCCGGAGTTTCATCAATTTTAGGGGCTGTAAATTTTATTACTACTGTAATTAATATACGATCTGCAGGAATTACTCTTGATCGACTACCTTTATTCGTTTGATCTGTAGTAATTACAGCTGTTTTATTACTTCTTTCACTTCCTGTATTAGCTGGAGCTATTACAATACTATTAACTGATCGAAATTTAAATACATCTTTCTTTGATCCAATTGGAGGAGGAGACCCAATTTTATACCAACATTTATTT" #> #> #> [[5]] #> [[5]]$id #> [1] "NEONV109-11" #> #> [[5]]$name #> [1] "Aedes thelcter" #> #> [[5]]$gene #> [1] "NEONV109-11" #> #> [[5]]$sequence #> [1] "AACTTTATACTTCATCTTCGGAGTTTGATCAGGAATAGTTGGTACATCATTAAGAATTTTAATTCGTGCTGAATTAAGTCAACCAGGTATATTTATTGGAAATGACCAAATTTATAATGTAATTGTTACAGCTCATGCTTTTATTATAATTTTCTTTATAGTTATACCTATTATAATTGGAGGATTTGGAAATTGACTAGTTCCTCTAATATTAGGAGCCCCAGATATAGCTTTCCCTCGAATAAATAATATAAGTTTTTGAATACTACCTCCCTCATTAACTCTTCTACTTTCAAGTAGTATAGTAGAAAATGGGTCAGGAACAGGATGAACAGTTTATCCACCTCTTTCATCTGGAACTGCTCATGCAGGAGCCTCTGTTGATTTAACTATTTTTTCTCTTCATTTAGCCGGAGTTTCATCAATTTTAGGGGCTGTAAATTTTATTACTACTGTAATTAATATACGATCTGCAGGAATTACTCTTGATCGACTACCTTTATTCGTTTGATCTGTAGTAATTACAGCTGTTTTATTACTTCTTTCACTTCCTGTATTAGCTGGAGCTATTACAATACTATTAACTGATCGAAATTTAAATACATCTTTCTTTGACCCAATTGGAGGGGGAGACCCAATTTTATACCAACATTTATTT" ``` And you can search by researcher name ```r bold_seq(researchers = 'Thibaud Decaens')[[1]] #> $id #> [1] "BGABA657-14" #> #> $name #> [1] "Coleoptera" #> #> $gene #> [1] "BGABA657-14" #> #> $sequence #> [1] "ACACTCTATTTCATTTTCGGAGCTTGATCAGGAATAGTAGGAACTTCTTTAAGAATACTAATTCGATCTGAATTGGGAAACCCCGGCTCATTGATTGGGGATGATCAAATTTATAATGTTATTGTAACAGCCCATGCATTCATTATAATTTTTTTTATAGTAATACCGATCATAATAGGAGGTTTTGGAAATTGATTAGTCCCGCTAATATTAGGTGCCCCAGATATAGCATTTCCTCGAATAAATAATATAAGATTTTGACTTCTTCCGCCTTCATTAACTTTACTTATTATAAGAAGAATTGTAGAAAACGGGGCGGGAACAGGATGAACAGTTTACCCACCCCTCTCTTCTAACATTGCTCATAGAGGAGCCTCTGTAGATCTTGCAATTTTTAGATTACATTTAGCCGGTGTATCATCAATTTTAGGTGCAGTTAATTTTATTACAACTATTATTAATATACGACCTAAAGGAATAACATTTGATCGCATACCTTTATTTGTATGAGCTGTAGCTTTAACTGCATTACTTTTATTATTATCTTTACCAGTATTAGCAGGTGCAATTACAATACTTTTAACTGATCGA---------------------------------------" ``` by taxon IDs ```r bold_seq(ids = c('ACRJP618-11', 'ACRJP619-11')) #> [[1]] #> [[1]]$id #> [1] "ACRJP618-11" #> #> [[1]]$name #> [1] "Lepidoptera" #> #> [[1]]$gene #> [1] "ACRJP618-11" #> #> [[1]]$sequence #> [1] "------------------------TTGAGCAGGCATAGTAGGAACTTCTCTTAGTCTTATTATTCGAACAGAATTAGGAAATCCAGGATTTTTAATTGGAGATGATCAAATCTACAATACTATTGTTACGGCTCATGCTTTTATTATAATTTTTTTTATAGTTATACCTATTATAATTGGAGGATTTGGTAATTGATTAGTTCCCCTTATACTAGGAGCCCCAGATATAGCTTTCCCTCGAATAAACAATATAAGTTTTTGGCTTCTTCCCCCTTCACTATTACTTTTAATTTCCAGAAGAATTGTTGAAAATGGAGCTGGAACTGGATGAACAGTTTATCCCCCACTGTCATCTAATATTGCCCATAGAGGTACATCAGTAGATTTAGCTATTTTTTCTTTACATTTAGCAGGTATTTCCTCTATTTTAGGAGCGATTAATTTTATTACTACAATTATTAATATACGAATTAACAGTATAAATTATGATCAAATACCACTATTTGTGTGATCAGTAGGAATTACTGCTTTACTCTTATTACTTTCTCTTCCAGTATTAGCAGGTGCTATCACTATATTATTAACGGATCGAAATTTAAATACATCATTTTTTGATCCTGCAGGAGGAGGAGATCCAATTTTATATCAACATTTATTT" #> #> #> [[2]] #> [[2]]$id #> [1] "ACRJP619-11" #> #> [[2]]$name #> [1] "Lepidoptera" #> #> [[2]]$gene #> [1] "ACRJP619-11" #> #> [[2]]$sequence #> [1] "AACTTTATATTTTATTTTTGGTATTTGAGCAGGCATAGTAGGAACTTCTCTTAGTCTTATTATTCGAACAGAATTAGGAAATCCAGGATTTTTAATTGGAGATGATCAAATCTACAATACTATTGTTACGGCTCATGCTTTTATTATAATTTTTTTTATAGTTATACCTATTATAATTGGAGGATTTGGTAATTGATTAGTTCCCCTTATACTAGGAGCCCCAGATATAGCTTTCCCTCGAATAAACAATATAAGTTTTTGGCTTCTTCCCCCTTCACTATTACTTTTAATTTCCAGAAGAATTGTTGAAAATGGAGCTGGAACTGGATGAACAGTTTATCCCCCACTGTCATCTAATATTGCCCATAGAGGTACATCAGTAGATTTAGCTATTTTTTCTTTACATTTAGCAGGTATTTCCTCTATTTTAGGAGCGATTAATTTTATTACTACAATTATTAATATACGAATTAACAGTATAAATTATGATCAAATACCACTATTTGTGTGATCAGTAGGAATTACTGCTTTACTCTTATTACTTTCTCTTCCAGTATTAGCAGGTGCTATCACTATATTATTAACGGATCGAAATTTAAATACATCATTTTTTGATCCTGCAGGAGGAGGAGATCCAATTTTATATCAACATTTATTT" ``` by container (containers include project codes and dataset codes) ```r bold_seq(container = 'ACRJP')[[1]] #> $id #> [1] "ACRJP003-09" #> #> $name #> [1] "Lepidoptera" #> #> $gene #> [1] "ACRJP003-09" #> #> $sequence #> [1] "AACATTATATTTTATTTTTGGGATCTGATCTGGAATAGTAGGGACATCTTTAAGTATACTAATTCGAATAGAACTAGGAAATCCTGGATGTTTAATTGGGGATGATCAAATTTATAATACTATTGTTACAGCTCATGCTTTTATTATAATTTTTTTTATAGTTATACCCATTATAATTGGAGGTTTTGGCAATTGACTTGTACCATTAATATTAGGAGCCCCTGATATAGCATTTCCCCGAATAAATAATATAAGATTTTGACTTCTTCCCCCCTCATTAATTTTATTAATTTCAAGAAGAATTGTTGAAAATGGAGCAGGAACAGGATGAACAGTCTATCCTCCATTATCTTCTAATATTGCGCATAGAGGATCCTCTGTTGATTTAGCTATTTTCTCACTTCATTTAGCAGGAATTTCTTCTATTTTAGGAGCAATTAATTTTATTACAACTATTATTAATATACGAATAAATAATTTACTTTTTGACCAAATACCTCTATTTGTTTGAGCAGTAGGTATTACAGCTGTTCTTCTTTTATTATCATTACCAGTATTAGCAGGAGCAATTACCATACTATTAACAGATCGTAATTTAAATACTTCTTTCTTTGATCCTGCTGGAGGAGGAGATCCAATTTTATACCAACATTTATTT" ``` by bin (a bin is a _Barcode Index Number_) ```r bold_seq(bin = 'BOLD:AAA5125')[[1]] #> $id #> [1] "BLPAB406-06" #> #> $name #> [1] "Eacles ormondei" #> #> $gene #> [1] "BLPAB406-06" #> #> $sequence #> [1] "AACTTTATATTTTATTTTTGGAATTTGAGCAGGTATAGTAGGAACTTCTTTAAGATTACTAATTCGAGCAGAATTAGGTACCCCCGGATCTTTAATTGGAGATGACCAAATTTATAATACCATTGTAACAGCTCATGCTTTTATTATAATTTTTTTTATAGTTATACCTATTATAATTGGAGGATTTGGAAATTGATTAGTACCCCTAATACTAGGAGCTCCTGATATAGCTTTCCCCCGAATAAATAATATAAGATTTTGACTATTACCCCCATCTTTAACTCTTTTAATTTCTAGAAGAATTGTCGAAAATGGAGCTGGAACTGGATGAACAGTTTATCCCCCCCTTTCATCTAATATTGCTCATGGAGGCTCTTCTGTTGATTTAGCTATTTTTTCCCTTCATCTAGCTGGAATCTCATCAATTTTAGGAGCTATTAATTTTATCACAACAATCATTAATATACGACTAAATAATATAATATTTGACCAAATACCTTTATTTGTATGAGCTGTTGGTATTACAGCATTTCTTTTATTGTTATCTTTACCTGTACTAGCTGGAGCTATTACTATACTTTTAACAGATCGAAACTTAAATACATCATTTTTTGACCCAGCAGGAGGAGGAGATCCTATTCTCTATCAACATTTATTT" ``` And there are more ways to query, check out the docs for `?bold_seq`. ### Search for specimen data only The BOLD specimen API doesn't give back sequences, only specimen data. By default you download `tsv` format data, which is given back to you as a `data.frame` ```r res <- bold_specimens(taxon = 'Osmia') head(res[,1:8]) #> processid sampleid recordID catalognum fieldnum #> 1 ASGCB261-13 BIOUG07489-F10 3955538 BIOUG07489-F10 #> 2 BCHYM1499-13 BC ZSM HYM 19359 4005348 BC ZSM HYM 19359 BC ZSM HYM 19359 #> 3 BCHYM412-13 BC ZSM HYM 18272 3896353 BC ZSM HYM 18272 BC ZSM HYM 18272 #> 4 BCHYM413-13 BC ZSM HYM 18273 3896354 BC ZSM HYM 18273 BC ZSM HYM 18273 #> 5 FBAPB706-09 BC ZSM HYM 02181 1289067 BC ZSM HYM 02181 BC ZSM HYM 02181 #> 6 FBAPB730-09 BC ZSM HYM 02205 1289091 BC ZSM HYM 02205 BC ZSM HYM 02205 #> institution_storing bin_uri phylum_taxID #> 1 Biodiversity Institute of Ontario BOLD:AAB8874 20 #> 2 SNSB, Zoologische Staatssammlung Muenchen BOLD:AAD6282 20 #> 3 SNSB, Zoologische Staatssammlung Muenchen BOLD:AAP2416 20 #> 4 SNSB, Zoologische Staatssammlung Muenchen BOLD:AAP2416 20 #> 5 SNSB, Zoologische Staatssammlung Muenchen BOLD:AAE4126 20 #> 6 SNSB, Zoologische Staatssammlung Muenchen BOLD:AAK5820 20 ``` You can optionally get back the data in `XML` format ```r bold_specimens(taxon = 'Osmia', format = 'xml') ``` ```r 1470124 BOM1525-10 BOLD:AAN3337 DHB 1011 DHB 1011 DHB1011 Marjorie Barrick Museum ``` You can choose to get the `httr` response object back if you'd rather work with the raw data returned from the BOLD API. ```r res <- bold_specimens(taxon = 'Osmia', format = 'xml', response = TRUE) res$url #> [1] "http://v4.boldsystems.org/index.php/API_Public/specimen?taxon=Osmia&format=xml" res$status_code #> [1] 200 res$headers #> NULL ``` ### Search for specimen plus sequence data The specimen/sequence combined API gives back specimen and sequence data. Like the specimen API, this one gives by default `tsv` format data, which is given back to you as a `data.frame`. Here, we're setting `sepfasta=TRUE` so that the sequence data is given back as a list, and taken out of the `data.frame` returned so the `data.frame` is more manageable. ```r res <- bold_seqspec(taxon = 'Osmia', sepfasta = TRUE) res$fasta[1:2] #> $`ASGCB261-13` #> [1] "AATTTTATATATAATTTTTGCTATATGATCAGGAATAATTGGTTCAGCAATAAGAATTATTATTCGAATAGAATTAAGAATTCCTGGTTCATGAATTTCAAATGATCAAACTTATAATTCTTTAGTTACTGCTCATGCTTTTTTAATAATTTTTTTCTTAGTTATACCATTCTTAATTGGGGGATTTGGAAATTGATTAATTCCTTTAATATTAGGAATTCCAGATATAGCATTTCCACGAATAAATAATATTAGATTTTGACTTTTACCTCCTTCTTTAATACTTTTATTATTAAGAAATTTTATAAATCCTAGTCCAGGAACTGGATGAACTGTTTATCCACCTTTATCTTCTCATTTATTTCATTCTTCTCCTTCAGTTGATATAGCTATTTTTTCTTTACATATTTCTGGTTTATCTTCTATTATAGGTTCATTAAATTTTATTGTTACAATTATTATAATAAAAAATATTTCTTTAAAACATATTCAATTACCTTTATTTCCTTGATCTGTCTTTATTACTACTATTTTATTACTTTTTTCTTTACCTGTTTTAGCAGGTGCAATTACTATATTATTATTTGATCGAAATTTTAATACTTCATTTTTTGATCCTACAGGAGGAGGAGATCCTATTCTTTATCAACATTTATTT" #> #> $`BCHYM1499-13` #> [1] "AATTCTTTACATAATTTTTGCTTTATGATCTGGAATAATTGGGTCAGCAATAAGAATTATTATTCGAATAGAATTAAGTATCCCAGGTTCATGAATTACTAATGATCAAATTTATAATTCTTTAGTAACTGCACATGCTTTTTTAATAATTTTTTTTCTTGTGATACCATTTTTAATTGGAGGATTTGGAAATTGATTAATTCCTTTAATATTAGGAATTCCAGATATAGCTTTCCCACGAATAAACAATATTAGATTTTGATTATTACCGCCATCTTTAATATTATTACTTTTAAGAAATTTTTTAAATCCAAGTCCTGGAACAGGATGAACAGTTTATCCCCCTTTATCATCAAATTTATTTCATTCTTCTCCTTCAGTTGATTTAGCAATTTTTTCTTTACATATTTCAGGTTTATCTTCTATTATAGGTTCATTAAATTTTATTGTTACAATTATTATAATAAAAAATATTTCTTTAAAATATATTCAATTGCCTTTATTTCCTTGATCTGTATTTATTACTACTATTCTTTTATTATTTTCTTTACCTGTGTTAGCTGGAGCTATTACTATATTATTATTTGATCGAAATTTTAATACATCTTTTTTTGATCCTACAGGAGGAGGAGATCCAATTCTTTATCAACATTTATTT" ``` Or you can index to a specific sequence like ```r res$fasta['GBAH0293-06'] #> $`GBAH0293-06` #> [1] "------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------TTAATGTTAGGGATTCCAGATATAGCTTTTCCACGAATAAATAATATTAGATTTTGACTGTTACCTCCATCTTTAATATTATTACTTTTAAGAAATTTTTTAAATCCAAGTCCTGGAACAGGATGAACAGTTTATCCTCCTTTATCATCAAATTTATTTCATTCTTCTCCTTCAGTTGATTTAGCAATTTTTTCTTTACATATTTCAGGTTTATCTTCTATTATAGGTTCATTAAATTTTATTGTTACAATTATTATAATAAAAAATATTTCTTTAAAATATATTCAATTACCTTTATTTTCTTGATCTGTATTTATTACTACTATTCTTTTATTATTTTCTTTACCTGTATTAGCTGGAGCTATTACTATATTATTATTTGATCGAAATTTTAATACATCTTTTTTTGATCCAACAGGAGGGGGAGATCCAATTCTTTATCAACATTTATTTTGATTTTTTGGTCATCCTGAAGTTTATATTTTAATTTTACCTGGATTTGGATTAATTTCTCAAATTATTTCTAATGAAAGAGGAAAAAAAGAAACTTTTGGAAATATTGGTATAATTTATGCTATATTAAGAATTGGACTTTTAGGTTTTATTGTT---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------" ``` ### Get trace files This function downloads files to your machine - it does not load them into your R session - but prints out where the files are for your information. ```r bold_trace(taxon = 'Osmia', quiet = TRUE) ``` bold/README.md0000644000176200001440000002200713134223100012437 0ustar liggesusersbold ==== [![Project Status: Active – The project has reached a stable, usable state and is being actively developed.](http://www.repostatus.org/badges/latest/active.svg)](http://www.repostatus.org/#active) ![](https://img.shields.io/badge/CRAN/GitHub-0.4.0_/0.4.4.9120-blue.svg) `bold` accesses BOLD barcode data. The Barcode of Life Data Systems (BOLD) is designed to support the generation and application of DNA barcode data. The platform consists of four main modules: a data portal, a database of barcode clusters, an educational portal, and a data collection workbench. This package retrieves data from the BOLD database of barcode clusters, and allows for searching of over 1.7M public records using multiple search criteria including sequence data, specimen data, specimen *plus* sequence data, as well as trace files. [Documentation for the BOLD API](http://v4.boldsystems.org/index.php/api_home). ## Package status and installation [![AppVeyor Build Status](https://ci.appveyor.com/api/projects/status/github/ropensci/bold?branch=master&svg=true)](https://ci.appveyor.com/project/ropensci/bold) [![Travis-CI Build Status](https://travis-ci.org/ropensci/bold.svg?branch=master)](https://travis-ci.org/) [![codecov.io](https://codecov.io/github/ropensci/bold/coverage.svg?branch=master)](https://codecov.io/github/ropensci/bold?branch=master) [![rstudio mirror downloads](http://cranlogs.r-pkg.org/badges/bold)](https://github.com/metacran/cranlogs.app) __Installation instructions__ __Stable Version__ ```r install.packages("bold") ``` __Development Version__ Install `sangerseqR` first ```r source("http://bioconductor.org/biocLite.R") biocLite("sangerseqR") ``` Then `bold` ```r devtools::install_github("ropensci/bold") ``` ## Usage ```r library("bold") ``` ### Search for sequence data only Default is to get a list back ```r bold_seq(taxon='Coelioxys')[[1]] #> $id #> [1] "BBHYL404-10" #> #> $name #> [1] "Coelioxys rufitarsis" #> #> $gene #> [1] "BBHYL404-10" #> #> $sequence #> [1] "TATAATATATATAATTTTTGCAATATGATCAGGTATAATTGGATCATCTTTAAGAATAATTATTCGAATAGAATTAAGAATCCCAGGTTCATGAATTAGAAATGATCAAATTTATAATTCTTTTATTACAGCACATGCATTTTTAATAATTTTTTTTTTAGTTATGCCTTTTCTAATTGGGGGATTTGGTAATTGATTAACACCATTAATACTTGGAGCTCCTGATATAGCTTTCCCCCGAATAAACAATATTAGATTTTGACTACTCCCACCTTCTTTATTACTTTTATTATCAAGAAATTTAATTAATCCAAGACCAGGAACAGGATGAACTGTTTATCCACCATTATCCTCTTATACATATCATCCATCTCCTTCTGTAGATTTAGCAATTTTTTCTTTACATTTATCAGGAATTTCCTCAATTATTGGATCAATAAATTTTATTGTTACAATTTTAATAATAAAAAATTATTCAATAAATTATAATCAAATACCATTATTCCCATGATCAGTTTTAATTACTACAATTTTATTATTACTATCACTTCCAGTATTAGCAGGAGCAATTACAATATTATTATTTGATCGAAATTTAAATTCTTCTTTTTTTGACCCAATAGGAGGAGGAGACCCAATTTTATATCAACATTTATTT\r" ``` You can optionally get back the `httr` response object ```r res <- bold_seq(taxon='Coelioxys', response=TRUE) res$response_headers #> $status #> [1] "HTTP/1.1 200 OK" #> #> $date #> [1] "Thu, 20 Jul 2017 21:51:40 GMT" #> #> $server #> [1] "Apache/2.2.15 (Red Hat)" #> #> $`x-powered-by` #> [1] "PHP/5.3.15" #> #> $`content-disposition` #> [1] "attachment; filename=fasta.fas" #> #> $connection #> [1] "close" #> #> $`transfer-encoding` #> [1] "chunked" #> #> $`content-type` #> [1] "application/x-download" ``` ### Search for specimen data only By default you download `tsv` format data, which is given back to you as a `data.frame` ```r res <- bold_specimens(taxon='Osmia') head(res[,1:8]) #> processid sampleid recordID catalognum fieldnum #> 1 ASGCB255-13 BIOUG07489-F04 3955532 BIOUG07489-F04 #> 2 ASGCB258-13 BIOUG07489-F07 3955535 BIOUG07489-F07 #> 3 BBHYA3298-12 BIOUG02688-A06 2711807 BIOUG02688-A06 L#11BIOBUS-2558 #> 4 BBHYL310-10 10BBCHY-3264 1769753 10BBCHY-3264 L#PC2010KT-025 #> 5 BCHYM1496-13 BC ZSM HYM 19356 4005345 BC ZSM HYM 19356 BC ZSM HYM 19356 #> 6 BCHYM412-13 BC ZSM HYM 18272 3896353 BC ZSM HYM 18272 BC ZSM HYM 18272 #> institution_storing collection_code #> 1 Biodiversity Institute of Ontario NA #> 2 Biodiversity Institute of Ontario NA #> 3 University of Guelph, Centre for Biodiversity Genomics NA #> 4 University of Guelph, Centre for Biodiversity Genomics NA #> 5 SNSB, Zoologische Staatssammlung Muenchen NA #> 6 SNSB, Zoologische Staatssammlung Muenchen NA #> bin_uri #> 1 BOLD:ABZ2181 #> 2 BOLD:AAC0884 #> 3 BOLD:ACF5858 #> 4 BOLD:AAC3295 #> 5 BOLD:AAI2010 #> 6 BOLD:AAP2416 ``` ### Search for specimen plus sequence data By default you download `tsv` format data, which is given back to you as a `data.frame` ```r res <- bold_seqspec(taxon='Osmia', sepfasta=TRUE) res$fasta[1:2] #> $`ASGCB255-13` #> [1] "-------------------------------GGAATAATTGGTTCTGCTATAAGTATTATTATTCGAATAGAATTAAGAATTCCTGGATCATTCATTTCTAATGATCAAACTTATAATTCTTTAGTAACAGCTCATGCTTTTTTAATAATTTTTTTTCTTGTAATACCATTTTTAATTGGTGGATTTGGAAATTGATTAATTCCATTAATATTAGGAATCCCAGATATAGCATTTCCTCGAATAAATAATATTAGATTTTGACTTTTACCCCCATCCTTAATAATTTTACTTTTAAGAAATTTCTTAAATCCAAGTCCAGGAACAGGTTGAACTGTATATCCCCCCCTTTCTTCTTATTTATTTCATTCTTCCCCTTCTGTTGATTTAGCTATTTTTTCTCTTCATATTTCTGGTTTATCTTCCATCATAGGTTCTTTAAATTTTATTGTTACAATTATTATAATAAAAAATATTTCATTAAAACATATTCAATTACCTTTATTTCCTTGATCCGTTTTTATTACAACTATTTTACTATTATTTTCTTTACCTGTTCTAGCAGGAGCTATTACTATATTATTATTTGATCGAAACTTTAATACTTCATTTTTTGATCCAACTGGAGGAGGAGATCCAATTTTATATCAACATTTATTC" #> #> $`ASGCB258-13` #> [1] "GATTTTATATATAATTTTTGCTATGTGATCAGGAATAATTGGTTCAGCAATAAGAATTATTATTCGAATAGAATTAAGAATTCCAGGTTCATGAATCTCTAATGATCAAATTTATAATTCTTTAGTTACTGCTCACGCTTTTTTAATAATTTTTTTTTTAGTAATACCATTTTTAATTGGAGGATTTGGTAATTGATTAGTTCCATTAATATTAGGAATTCCAGATATAGCATTTCCACGAATAAATAATATTAGATTTTGACTTTTACCTCCTTCTTTAATGTTATTACTTTTAAGAAATTTTTTAAATCCAAGTCCAGGAACTGGATGAACTGTATATCCTCCTCTTTCTTCTCATTTATTTCATTCTTCTCCTTCAGTTGATATAGCTATTTTTTCTTTACATATTTCTGGTTTATCTTCTATTATAGGTTCATTAAATTTTATTGTTACAATTATTATAATAAAAAATATTTCATTAAAACATATTCAATTGCCTTTATTTCCTTGATCTGTTTTTATTACTACTATTTTATTACTTTTTTCTTTACCTGTTTTAGCTGGAGCAATTACTATATTATTATTTGATCGAAATTTTAATACTTCATTTTTTGATCCGACAGGAGGTGGAGATCCAATTCTTTATCAACATTTATTT" ``` Or you can index to a specific sequence like ```r res$fasta['GBAH0293-06'] #> $`GBAH0293-06` #> [1] "------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------TTAATGTTAGGGATTCCAGATATAGCTTTTCCACGAATAAATAATATTAGATTTTGACTGTTACCTCCATCTTTAATATTATTACTTTTAAGAAATTTTTTAAATCCAAGTCCTGGAACAGGATGAACAGTTTATCCTCCTTTATCATCAAATTTATTTCATTCTTCTCCTTCAGTTGATTTAGCAATTTTTTCTTTACATATTTCAGGTTTATCTTCTATTATAGGTTCATTAAATTTTATTGTTACAATTATTATAATAAAAAATATTTCTTTAAAATATATTCAATTACCTTTATTTTCTTGATCTGTATTTATTACTACTATTCTTTTATTATTTTCTTTACCTGTATTAGCTGGAGCTATTACTATATTATTATTTGATCGAAATTTTAATACATCTTTTTTTGATCCAACAGGAGGGGGAGATCCAATTCTTTATCAACATTTATTTTGATTTTTTGGTCATCCTGAAGTTTATATTTTAATTTTACCTGGATTTGGATTAATTTCTCAAATTATTTCTAATGAAAGAGGAAAAAAAGAAACTTTTGGAAATATTGGTATAATTTATGCTATATTAAGAATTGGACTTTTAGGTTTTATTGTT---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------" ``` ### Get trace files This function downloads files to your machine - it does not load them into your R session - but prints out where the files are for your information. ```r x <- bold_trace(ids = 'ACRJP618-11', progress = FALSE) read_trace(x$ab1) #> Number of datapoints: 8877 #> Number of basecalls: 685 #> #> Primary Basecalls: NNNNNNNNNNNNNNNNNNGNNNTTGAGCAGGNATAGTAGGANCTTCTCTTAGTCTTATTATTCGAACAGAATTAGGAAATCCAGGATTTTTAATTGGAGATGATCAAATCTACAATACTATTGTTACGGCTCATGCTTTTATTATAATTTTTTTTATAGTTATACCTATTATAATTGGAGGATTTGGTAATTGATTAGTTCCCCTTATACTAGGAGCCCCAGATATAGCTTTCCCTCGAATAAACAATATAAGTTTTTGGCTTCTTCCCCCTTCACTATTACTTTTAATTTCCAGAAGAATTGTTGAAAATGGAGCTGGAACTGGATGAACAGTTTATCCCCCACTGTCATCTAATATTGCCCATAGAGGTACATCAGTAGATTTAGCTATTTTTTCTTTACATTTAGCAGGTATTTCCTCTATTTTAGGAGCGATTAATTTTATTACTACAATTATTAATATACGAATTAACAGTATAAATTATGATCAAATACCACTATTTGTGTGATCAGTAGGAATTACTGCTTTACTCTTATTACTTTCTCTTCCAGTATTAGCAGGTGCTATCACTATATTATTAACGGATCGAAATTTAAATACATCATTTTTTGATCCTGCAGGAGGAGGAGATCCAATTTTATATCAACATTTATTTTGATTTTTTGGACNTCNNNNAAGTTTAAN #> #> Secondary Basecalls: ``` ## Citation Get citation information for `bold` in R by running: `citation(package = 'bold')` ## Code of Conduct Please note that this project is released with a [Contributor Code of Conduct](CONDUCT.md). By participating in this project you agree to abide by its terms. [![ropensci_footer](https://ropensci.org/public_images/github_footer.png)](https://ropensci.org) bold/MD50000644000176200001440000000426513134504041011504 0ustar liggesusersd9f0d0f86c57ec088ae6e2ccc75ba3ee *DESCRIPTION c5af52351472a750055a760a8924ce71 *LICENSE 19dc97bc26681640044166206b3e8b26 *NAMESPACE a143b032c7bab4a5641065d39bcde2f2 *NEWS.md bca22309792f9e1bef76921cb712c956 *R/bold-package.R 86af3511a9fb16be45bd4a9aa65ec5bf *R/bold_filter.R 64f2b2e4baf7806e6c1882330e97e948 *R/bold_identify.R f7575812735ace5257d663c64eff23c3 *R/bold_identify_parents.R 7d476be15469458fb3490e5f6751b2a5 *R/bold_seq.R 34757525c62643946a13bbdc88a2c499 *R/bold_seqspec.R c04bfe652c63063579726cb9ac9b72d3 *R/bold_specimens.R fb4a5c2495a64b58d9dbb930953926aa *R/bold_stats.R 5b1e9f8a6f26df5e328a32033c276ed2 *R/bold_tax_id.R cbf8ecafcc3b5d2943be20908afa6b16 *R/bold_tax_name.R 3725c7e5034c9054eba152b29dab6a67 *R/bold_trace.R c71c8a8fa5c0f0d39b74fb77e29783e9 *R/zzz.R d3211d9455722a179f4b474123d260f2 *README.md d24146de7dd680956b18b29d27d96e36 *build/vignette.rds bb64a460c31e2e6821ac53870b09c38e *data/sequences.RData 4e8e66850d376db54b734f6459de4546 *inst/doc/bold_vignette.Rmd 83e40e7100860c0b34b9ad2259c9a05c *inst/doc/bold_vignette.html 1fc13c735a05dc20167294991f30e259 *man/bold-package.Rd 7025c073016140b45f74341a683a2c90 *man/bold_filter.Rd 2fc7738893d734266d23d2f0067b629c *man/bold_identify.Rd bd4d479f09995ce3b37ed0e32a9c5a5a *man/bold_identify_parents.Rd 309ebec7ac23da6ba0bded7c985a71be *man/bold_seq.Rd 6ec8abe164beeb79ca6f2b2de7b31d3c *man/bold_seqspec.Rd bd3feb5a07142dcd69faec4333cb9b41 *man/bold_specimens.Rd ecdb0aa85bd247e984a4ca58b8530c42 *man/bold_stats.Rd 53b9c4a880cfa10f17aeb5e04f644d07 *man/bold_tax_id.Rd ae740ce75ebf5f8118c443e6c1715a76 *man/bold_tax_name.Rd 302ca05fd77797d50f25d36149283649 *man/bold_trace.Rd 8b6eac4da649615fee64522edaf3bf0b *man/sequences.Rd d9066883a8fecb16e80ceeef8323edac *tests/test-all.R acc0cd3d6511852edf1c5b919b824f97 *tests/testthat/test-bold_identify.R 07443ab368120fb6f9e18e8a00323b9a *tests/testthat/test-bold_seq.R c061b818529a0d00d7dab35fa77fea77 *tests/testthat/test-bold_seqspec.R bf5e0d5ce0c5fd2201c8ccc261f88ca6 *tests/testthat/test-bold_specimens.R 670498c2dfc92d737a1fd81be132f464 *tests/testthat/test-bold_tax_id.R 2a174b1e7e11a116070defb4ac5fb4f4 *tests/testthat/test-bold_tax_name.R 4e8e66850d376db54b734f6459de4546 *vignettes/bold_vignette.Rmd bold/build/0000755000176200001440000000000013134420302012261 5ustar liggesusersbold/build/vignette.rds0000644000176200001440000000031213134420302014614 0ustar liggesusersb```b`fad`b2 1# 'LI/LK-)I MAS RS&);$7M `>DXYsS4楀aM wjey~L6̜T!%ps QY_/ȷ @?{49'ݣ\)%ziE@ w{bold/DESCRIPTION0000644000176200001440000000205513134504041012675 0ustar liggesusersPackage: bold Title: Interface to Bold Systems API Description: A programmatic interface to the Web Service methods provided by Bold Systems () for genetic 'barcode' data. Functions include methods for searching by sequences by taxonomic names, ids, collectors, and institutions; as well as a function for searching for specimens, and downloading trace files. Version: 0.5.0 License: MIT + file LICENSE Authors@R: c(person("Scott", "Chamberlain", role = c("aut", "cre"), email = "myrmecocystus@gmail.com")) URL: https://github.com/ropensci/bold BugReports: https://github.com/ropensci/bold/issues VignetteBuilder: knitr LazyData: yes Imports: xml2, crul (>= 0.3.8), stringr, jsonlite, reshape, plyr, data.table, tibble Suggests: roxygen2 (>= 6.0.1), sangerseqR, knitr, testthat RoxygenNote: 6.0.1 NeedsCompilation: no Packaged: 2017-07-21 15:41:54 UTC; sacmac Author: Scott Chamberlain [aut, cre] Maintainer: Scott Chamberlain Repository: CRAN Date/Publication: 2017-07-21 23:02:56 UTC bold/man/0000755000176200001440000000000013134207626011751 5ustar liggesusersbold/man/bold_tax_name.Rd0000644000176200001440000000332713134212756015041 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/bold_tax_name.R \name{bold_tax_name} \alias{bold_tax_name} \title{Search BOLD for taxonomy data by taxonomic name} \usage{ bold_tax_name(name, fuzzy = FALSE, response = FALSE, ...) } \arguments{ \item{name}{(character) One or more scientific names. required.} \item{fuzzy}{(logical) Whether to use fuzzy search or not (default: FALSE).} \item{response}{(logical) Note that response is the object that returns from the Curl call, useful for debugging, and getting detailed info on the API call.} \item{...}{Further args passed on to \code{\link[crul]{HttpClient}}, main purpose being curl debugging} } \description{ Search BOLD for taxonomy data by taxonomic name } \details{ The \code{dataTypes} parameter is not supported in this function. If you want to use that parameter, get an ID from this function and pass it into \code{bold_tax_id}, and then use the \code{dataTypes} parameter. } \examples{ \dontrun{ bold_tax_name(name='Diplura') bold_tax_name(name='Osmia') bold_tax_name(name=c('Diplura','Osmia')) bold_tax_name(name=c("Apis","Puma concolor","Pinus concolor")) bold_tax_name(name='Diplur', fuzzy=TRUE) bold_tax_name(name='Osm', fuzzy=TRUE) ## get http response object only bold_tax_name(name='Diplura', response=TRUE) bold_tax_name(name=c('Diplura','Osmia'), response=TRUE) ## Names with no data in BOLD database bold_tax_name("Nasiaeshna pentacantha") bold_tax_name(name = "Cordulegaster erronea") bold_tax_name(name = "Cordulegaster erronea", response=TRUE) ## curl debugging bold_tax_name(name='Diplura', verbose = TRUE) } } \references{ \url{http://v4.boldsystems.org/index.php/resources/api?type=taxonomy} } \seealso{ \code{\link{bold_tax_id}} } bold/man/bold_filter.Rd0000644000176200001440000000237413134226557014537 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/bold_filter.R \name{bold_filter} \alias{bold_filter} \title{Get BOLD specimen + sequence data.} \usage{ bold_filter(x, by, how = "max") } \arguments{ \item{x}{(data.frame) a data.frame, as returned from \code{\link{bold_seqspec}}. Note that some combinations of parameters in \code{\link{bold_seqspec}} don't return a data.frame. Stops with error message if this is not a data.frame. Required.} \item{by}{(character) the column by which to group. For example, if you want the longest sequence for each unique species name, then pass \strong{species_name}. If the column doesn't exist, error with message saying so. Required.} \item{how}{(character) one of "max" or "min", which get used as \code{which.max} or \code{which.min} to get the longest or shortest sequence, respectively. Note that we remove gap/alignment characters (\code{-})} } \value{ a tibble/data.frame } \description{ Get BOLD specimen + sequence data. } \examples{ \dontrun{ res <- bold_seqspec(taxon='Osmia') maxx <- bold_filter(res, by = "species_name") minn <- bold_filter(res, by = "species_name", how = "min") vapply(maxx$nucleotides, nchar, 1, USE.NAMES = FALSE) vapply(minn$nucleotides, nchar, 1, USE.NAMES = FALSE) } } bold/man/bold_seq.Rd0000644000176200001440000000525113134220504014021 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/bold_seq.R \name{bold_seq} \alias{bold_seq} \title{Search BOLD for sequences.} \usage{ bold_seq(taxon = NULL, ids = NULL, bin = NULL, container = NULL, institutions = NULL, researchers = NULL, geo = NULL, marker = NULL, response = FALSE, ...) } \arguments{ \item{taxon}{(character) Returns all records containing matching taxa. Taxa includes the ranks of phylum, class, order, family, subfamily, genus, and species.} \item{ids}{(character) Returns all records containing matching IDs. IDs include Sample IDs, Process IDs, Museum IDs and Field IDs.} \item{bin}{(character) Returns all records contained in matching BINs. A BIN is defined by a Barcode Index Number URI.} \item{container}{(character) Returns all records contained in matching projects or datasets. Containers include project codes and dataset codes} \item{institutions}{(character) Returns all records stored in matching institutions. Institutions are the Specimen Storing Site.} \item{researchers}{(character) Returns all records containing matching researcher names. Researchers include collectors and specimen identifiers.} \item{geo}{(character) Returns all records collected in matching geographic sites. Geographic sites includes countries and province/states.} \item{marker}{(character) Returns all records containing matching marker codes.} \item{response}{(logical) Note that response is the object that returns from the Curl call, useful for debugging, and getting detailed info on the API call.} \item{...}{Further args passed on to \code{\link[crul]{HttpClient}}, main purpose being curl debugging} } \value{ A list with each element of length 4 with slots for id, name, gene, and sequence. } \description{ Get sequences for a taxonomic name, id, bin, container, institution, researcher, geographic, place, or gene. } \examples{ \dontrun{ res <- bold_seq(taxon='Coelioxys') bold_seq(taxon='Aglae') bold_seq(taxon=c('Coelioxys','Osmia')) bold_seq(ids='ACRJP618-11') bold_seq(ids=c('ACRJP618-11','ACRJP619-11')) bold_seq(bin='BOLD:AAA5125') bold_seq(container='ACRJP') bold_seq(researchers='Thibaud Decaens') bold_seq(geo='Ireland') bold_seq(geo=c('Ireland','Denmark')) # Return the http response object for detailed Curl call response details res <- bold_seq(taxon='Coelioxys', response=TRUE) res$url res$status_code res$response_headers ## curl debugging ### You can do many things, including get verbose output on the curl ### call, and set a timeout bold_seq(taxon='Coelioxys', verbose = TRUE)[1:2] # bold_seqspec(taxon='Coelioxys', timeout_ms = 10) } } \references{ \url{http://v4.boldsystems.org/index.php/resources/api?type=webservices} } bold/man/bold_identify_parents.Rd0000644000176200001440000000347213134226557016621 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/bold_identify_parents.R \name{bold_identify_parents} \alias{bold_identify_parents} \title{Add taxonomic parent names to a data.frame} \usage{ bold_identify_parents(x, wide = FALSE) } \arguments{ \item{x}{(data.frame/list) list of data.frames - the output from a call to \code{\link{bold_identify}}. or a single data.frame from the output from same. required.} \item{wide}{(logical) output in long or wide format. See Details. Default: \code{FALSE}} } \value{ a list of the same length as the input } \description{ Add taxonomic parent names to a data.frame } \details{ This function gets unique set of taxonomic names from the input data.frame, then queries \code{\link{bold_tax_name}} to get the taxonomic ID, passing it to \code{\link{bold_tax_id}} to get the parent names, then attaches those to the input data. Records in the input data that do not have matches for parent names simply get NA values in the added columns. } \section{wide vs long format}{ When \code{wide = FALSE} you get many rows for each record. Essentially, we \code{cbind} the taxonomic classification onto the one row from the result of \code{\link{bold_identify}}, giving as many rows as there are taxa in the taxonomic classification. When \code{wide = TRUE} you get one row for each record - thus the dimensions of the input data stay the same. For this option, we take just the rows for taxonomic ID and name for each taxon in the taxonomic classification, and name the columns by the taxon rank, so you get \code{phylum} and \code{phylum_id}, and so on. } \examples{ \dontrun{ df <- bold_identify(sequences = sequences$seq2) # long format out <- bold_identify_parents(df) str(out) head(out[[1]]) # wide format out <- bold_identify_parents(df, wide = TRUE) str(out) head(out[[1]]) } } bold/man/sequences.Rd0000644000176200001440000000070513121306447014232 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/bold-package.R \docType{data} \name{sequences} \alias{sequences} \title{List of 3 nucleotide sequences to use in examples for the \code{\link{bold_identify}} function} \description{ List of 3 nucleotide sequences to use in examples for the \code{\link{bold_identify}} function } \details{ Each sequence is a character string, of lengths 410, 600, and 696. } \keyword{data} bold/man/bold_tax_id.Rd0000644000176200001440000000364413134212756014517 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/bold_tax_id.R \name{bold_tax_id} \alias{bold_tax_id} \title{Search BOLD for taxonomy data by BOLD ID.} \usage{ bold_tax_id(id, dataTypes = "basic", includeTree = FALSE, response = FALSE, ...) } \arguments{ \item{id}{(integer) One or more BOLD taxonomic identifiers. required.} \item{dataTypes}{(character) Specifies the datatypes that will be returned. 'all' returns all data. 'basic' returns basic taxon information. 'images' returns specimen images.} \item{includeTree}{(logical) If TRUE (default: FALSE), returns a list containing information for parent taxa as well as the specified taxon.} \item{response}{(logical) Note that response is the object that returns from the Curl call, useful for debugging, and getting detailed info on the API call.} \item{...}{Further args passed on to \code{\link[crul]{HttpClient}}, main purpose being curl debugging} } \description{ Search BOLD for taxonomy data by BOLD ID. } \examples{ \dontrun{ bold_tax_id(id=88899) bold_tax_id(id=88899, includeTree=TRUE) bold_tax_id(id=88899, includeTree=TRUE, dataTypes = "stats") bold_tax_id(id=c(88899,125295)) ## dataTypes parameter bold_tax_id(id=88899, dataTypes = "basic") bold_tax_id(id=88899, dataTypes = "stats") bold_tax_id(id=88899, dataTypes = "images") bold_tax_id(id=88899, dataTypes = "geo") bold_tax_id(id=88899, dataTypes = "sequencinglabs") bold_tax_id(id=88899, dataTypes = "depository") bold_tax_id(id=c(88899,125295), dataTypes = "geo") bold_tax_id(id=c(88899,125295), dataTypes = "images") ## Passing in NA bold_tax_id(id = NA) bold_tax_id(id = c(88899,125295,NA)) ## get http response object only bold_tax_id(id=88899, response=TRUE) bold_tax_id(id=c(88899,125295), response=TRUE) ## curl debugging bold_tax_id(id=88899, verbose = TRUE) } } \references{ \url{http://v4.boldsystems.org/index.php/resources/api?type=taxonomy} } \seealso{ \code{bold_tax_name} } bold/man/bold-package.Rd0000644000176200001440000000263413134212756014556 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/bold-package.R \docType{package} \name{bold-package} \alias{bold-package} \alias{bold} \title{bold: A programmatic interface to the Barcode of Life data.} \description{ bold: A programmatic interface to the Barcode of Life data. } \section{About}{ This package gives you access to data from BOLD System \url{http://www.boldsystems.org/} via their API (\url{http://v4.boldsystems.org/index.php/api_home}) } \section{Functions}{ \itemize{ \item \code{\link{bold_specimens}} - Search for specimen data. \item \code{\link{bold_seq}} - Search for and retrieve sequences. \item \code{\link{bold_seqspec}} - Get sequence and specimen data together. \item \code{\link{bold_trace}} - Get trace files - saves to disk. \item \code{\link{read_trace}} - Read trace files into R. \item \code{\link{bold_tax_name}} - Get taxonomic names via input names. \item \code{\link{bold_tax_id}} - Get taxonomic names via BOLD identifiers. \item \code{\link{bold_identify}} - Search for match given a COI sequence. } Interestingly, they provide xml and tsv format data for the specimen data, while they provide fasta data format for the sequence data. So for the specimen data you can get back raw XML, or a data frame parsed from the tsv data, while for sequence data you get back a list (b/c sequences are quite long and would make a data frame unwieldy). } bold/man/bold_trace.Rd0000644000176200001440000000511613134226557014345 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/bold_trace.R \name{bold_trace} \alias{bold_trace} \alias{read_trace} \title{Get BOLD trace files} \usage{ bold_trace(taxon = NULL, ids = NULL, bin = NULL, container = NULL, institutions = NULL, researchers = NULL, geo = NULL, marker = NULL, dest = NULL, overwrite = TRUE, progress = TRUE, ...) read_trace(x) } \arguments{ \item{taxon}{(character) Returns all records containing matching taxa. Taxa includes the ranks of phylum, class, order, family, subfamily, genus, and species.} \item{ids}{(character) Returns all records containing matching IDs. IDs include Sample IDs, Process IDs, Museum IDs and Field IDs.} \item{bin}{(character) Returns all records contained in matching BINs. A BIN is defined by a Barcode Index Number URI.} \item{container}{(character) Returns all records contained in matching projects or datasets. Containers include project codes and dataset codes} \item{institutions}{(character) Returns all records stored in matching institutions. Institutions are the Specimen Storing Site.} \item{researchers}{(character) Returns all records containing matching researcher names. Researchers include collectors and specimen identifiers.} \item{geo}{(character) Returns all records collected in matching geographic sites. Geographic sites includes countries and province/states.} \item{marker}{(character) Returns all records containing matching marker codes.} \item{dest}{(character) A directory to write the files to} \item{overwrite}{(logical) Overwrite existing directory and file?} \item{progress}{(logical) Print progress or not. NOT AVAILABLE FOR NOW. HOPEFULLY WILL RETURN SOON.} \item{...}{Further args passed on to \code{\link[crul]{HttpClient}}} \item{x}{Object to print or read.} } \description{ Get BOLD trace files } \examples{ \dontrun{ # Use a specific destination directory bold_trace(taxon='Bombus', geo='Alaska', dest="~/mytarfiles") # Another example # bold_trace(ids='ACRJP618-11', dest="~/mytarfiles") # bold_trace(ids=c('ACRJP618-11','ACRJP619-11'), dest="~/mytarfiles") # read file in x <- bold_trace(ids=c('ACRJP618-11','ACRJP619-11'), dest="~/mytarfiles") (res <- read_trace(x$ab1[2])) # The progress dialog is pretty verbose, so quiet=TRUE is a nice touch, # but not by default # Beware, this one take a while # x <- bold_trace(taxon='Osmia', quiet=TRUE) if (requireNamespace("sangerseqR", quietly = TRUE)) { library("sangerseqR") primarySeq(res) secondarySeq(res) head(traceMatrix(res)) } } } \references{ \url{http://v4.boldsystems.org/index.php/resources/api?type=webservices} } bold/man/bold_specimens.Rd0000644000176200001440000000520513134220504015216 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/bold_specimens.R \name{bold_specimens} \alias{bold_specimens} \title{Search BOLD for specimens.} \usage{ bold_specimens(taxon = NULL, ids = NULL, bin = NULL, container = NULL, institutions = NULL, researchers = NULL, geo = NULL, response = FALSE, format = "tsv", ...) } \arguments{ \item{taxon}{(character) Returns all records containing matching taxa. Taxa includes the ranks of phylum, class, order, family, subfamily, genus, and species.} \item{ids}{(character) Returns all records containing matching IDs. IDs include Sample IDs, Process IDs, Museum IDs and Field IDs.} \item{bin}{(character) Returns all records contained in matching BINs. A BIN is defined by a Barcode Index Number URI.} \item{container}{(character) Returns all records contained in matching projects or datasets. Containers include project codes and dataset codes} \item{institutions}{(character) Returns all records stored in matching institutions. Institutions are the Specimen Storing Site.} \item{researchers}{(character) Returns all records containing matching researcher names. Researchers include collectors and specimen identifiers.} \item{geo}{(character) Returns all records collected in matching geographic sites. Geographic sites includes countries and province/states.} \item{response}{(logical) Note that response is the object that returns from the Curl call, useful for debugging, and getting detailed info on the API call.} \item{format}{(character) One of xml, json, tsv (default). tsv format gives back a data.frame object. xml gives back parsed XML as \code{xml_document} object. 'json' (JavaScript Object Notation) and 'dwc' (Darwin Core Archive) are supported in theory, but the JSON can be malformed, so we don't support that here, and the DWC option actually returns TSV.} \item{...}{Further args passed on to \code{\link[crul]{HttpClient}}, main purpose being curl debugging} } \description{ Search BOLD for specimens. } \examples{ \dontrun{ bold_specimens(taxon='Osmia') bold_specimens(taxon='Osmia', format='xml') bold_specimens(taxon='Osmia', response=TRUE) res <- bold_specimens(taxon='Osmia', format='xml', response=TRUE) res$url res$status_code res$response_headers # More than 1 can be given for all search parameters bold_specimens(taxon=c('Coelioxys','Osmia')) ## curl debugging ### These examples below take a long time, so you can set a timeout so that ### it stops by X sec head(bold_specimens(taxon='Osmia', verbose = TRUE)) # head(bold_specimens(geo='Costa Rica', timeout_ms = 6)) } } \references{ \url{http://v4.boldsystems.org/index.php/resources/api?type=webservices} } bold/man/bold_seqspec.Rd0000644000176200001440000000650013134220504014672 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/bold_seqspec.R \name{bold_seqspec} \alias{bold_seqspec} \title{Get BOLD specimen + sequence data.} \usage{ bold_seqspec(taxon = NULL, ids = NULL, bin = NULL, container = NULL, institutions = NULL, researchers = NULL, geo = NULL, marker = NULL, response = FALSE, format = "tsv", sepfasta = FALSE, ...) } \arguments{ \item{taxon}{(character) Returns all records containing matching taxa. Taxa includes the ranks of phylum, class, order, family, subfamily, genus, and species.} \item{ids}{(character) Returns all records containing matching IDs. IDs include Sample IDs, Process IDs, Museum IDs and Field IDs.} \item{bin}{(character) Returns all records contained in matching BINs. A BIN is defined by a Barcode Index Number URI.} \item{container}{(character) Returns all records contained in matching projects or datasets. Containers include project codes and dataset codes} \item{institutions}{(character) Returns all records stored in matching institutions. Institutions are the Specimen Storing Site.} \item{researchers}{(character) Returns all records containing matching researcher names. Researchers include collectors and specimen identifiers.} \item{geo}{(character) Returns all records collected in matching geographic sites. Geographic sites includes countries and province/states.} \item{marker}{(character) Returns all records containing matching marker codes. See Details.} \item{response}{(logical) Note that response is the object that returns from the Curl call, useful for debugging, and getting detailed info on the API call.} \item{format}{(character) One of xml or tsv (default). tsv format gives back a data.frame object. xml gives back parsed xml as a} \item{sepfasta}{(logical) If \code{TRUE}, the fasta data is separated into a list with names matching the processid's from the data frame. Default: \code{FALSE}} \item{...}{Further args passed on to \code{\link[crul]{HttpClient}}, main purpose being curl debugging} } \value{ Either a data.frame, parsed xml, a http response object, or a list with length two (a data.frame w/o nucleotide data, and a list with nucleotide data) } \description{ Get BOLD specimen + sequence data. } \section{Marker}{ Notes from BOLD on the \code{marker} param: "All markers for a specimen matching the search string will be returned. ie. A record with COI-5P and ITS will return sequence data for both markers even if only COI-5P was specified." You will likely end up with data with markers that you did not request - just be sure to filter those out as needed. } \examples{ \dontrun{ bold_seqspec(taxon='Osmia') bold_seqspec(taxon='Osmia', format='xml') bold_seqspec(taxon='Osmia', response=TRUE) res <- bold_seqspec(taxon='Osmia', sepfasta=TRUE) res$fasta[1:2] res$fasta['GBAH0293-06'] # records that match a marker name res <- bold_seqspec(taxon="Melanogrammus aeglefinus", marker="COI-5P") # records that match a geographic locality res <- bold_seqspec(taxon="Melanogrammus aeglefinus", geo="Canada") ## curl debugging ### You can do many things, including get verbose output on the curl call, ### and set a timeout head(bold_seqspec(taxon='Osmia', verbose = TRUE)) ## timeout # head(bold_seqspec(taxon='Osmia', timeout_ms = 1)) } } \references{ \url{http://v4.boldsystems.org/index.php/resources/api?type=webservices} } bold/man/bold_stats.Rd0000644000176200001440000000515013134224125014370 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/bold_stats.R \name{bold_stats} \alias{bold_stats} \title{Get BOLD stats} \usage{ bold_stats(taxon = NULL, ids = NULL, bin = NULL, container = NULL, institutions = NULL, researchers = NULL, geo = NULL, dataType = "drill_down", response = FALSE, ...) } \arguments{ \item{taxon}{(character) Returns all records containing matching taxa. Taxa includes the ranks of phylum, class, order, family, subfamily, genus, and species.} \item{ids}{(character) Returns all records containing matching IDs. IDs include Sample IDs, Process IDs, Museum IDs and Field IDs.} \item{bin}{(character) Returns all records contained in matching BINs. A BIN is defined by a Barcode Index Number URI.} \item{container}{(character) Returns all records contained in matching projects or datasets. Containers include project codes and dataset codes} \item{institutions}{(character) Returns all records stored in matching institutions. Institutions are the Specimen Storing Site.} \item{researchers}{(character) Returns all records containing matching researcher names. Researchers include collectors and specimen identifiers.} \item{geo}{(character) Returns all records collected in matching geographic sites. Geographic sites includes countries and province/states.} \item{dataType}{(character) one of "overview" or "drill_down" (default). "drill_down": a detailed summary of information which provides record counts by [BINs, Country, Storing Institution, Species]. "overview": the total counts of [BINs, Countries, Storing Institutions, Orders, Families, Genus, Species]} \item{response}{(logical) Note that response is the object that returns from the Curl call, useful for debugging, and getting detailed info on the API call.} \item{...}{Further args passed on to \code{\link[crul]{HttpClient}}, main purpose being curl debugging} } \description{ Get BOLD stats } \examples{ \dontrun{ x <- bold_stats(taxon='Osmia') x$total_records x$records_with_species_name x$bins x$countries x$depositories x$order x$family x$genus x$species # just get all counts lapply(Filter(is.list, x), "[[", "count") res <- bold_stats(taxon='Osmia', response=TRUE) res$url res$status_code res$response_headers # More than 1 can be given for all search parameters bold_stats(taxon=c('Coelioxys','Osmia')) ## curl debugging ### These examples below take a long time, so you can set a timeout so that ### it stops by X sec bold_stats(taxon='Osmia', verbose = TRUE) # bold_stats(geo='Costa Rica', timeout_ms = 6) } } \references{ \url{http://v4.boldsystems.org/index.php/resources/api?type=webservices} } bold/man/bold_identify.Rd0000644000176200001440000000522013134220504015040 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/bold_identify.R \name{bold_identify} \alias{bold_identify} \title{Search for matches to sequences against the BOLD COI database.} \usage{ bold_identify(sequences, db = "COX1", response = FALSE, ...) } \arguments{ \item{sequences}{(character) Returns all records containing matching marker codes. Required.} \item{db}{(character) The database to match against, one of COX1, COX1_SPECIES, COX1_SPECIES_PUBLIC, OR COX1_L604bp. See Details for more information.} \item{response}{(logical) Note that response is the object that returns from the Curl call, useful for debugging, and getting detailed info on the API call.} \item{...}{Further args passed on to \code{\link[crul]{HttpClient}}, main purpose being curl debugging} } \value{ A data.frame with details for each specimen matched. if a failed request, returns \code{NULL} } \description{ Search for matches to sequences against the BOLD COI database. } \section{db parmeter options}{ \itemize{ \item COX1 Every COI barcode record on BOLD with a minimum sequence length of 500bp (warning: unvalidated library and includes records without species level identification). This includes many species represented by only one or two specimens as well as all species with interim taxonomy. This search only returns a list of the nearest matches and does not provide a probability of placement to a taxon. \item COX1_SPECIES Every COI barcode record with a species level identification and a minimum sequence length of 500bp. This includes many species represented by only one or two specimens as well as all species with interim taxonomy. \item COX1_SPECIES_PUBLIC All published COI records from BOLD and GenBank with a minimum sequence length of 500bp. This library is a collection of records from the published projects section of BOLD. \item OR COX1_L604bp Subset of the Species library with a minimum sequence length of 640bp and containing both public and private records. This library is intended for short sequence identification as it provides maximum overlap with short reads from the barcode region of COI. } } \section{Named outputs}{ To maintain names on the output list of data make sure to pass in a named list to the \code{sequences} parameter. You can for example, take a list of sequences, and use \code{\link{setNames}} to set names. } \examples{ \dontrun{ seq <- sequences$seq1 res <- bold_identify(sequences=seq) head(res[[1]]) head(bold_identify(sequences=seq, db='COX1_SPECIES')[[1]]) } } \references{ \url{http://v4.boldsystems.org/index.php/resources/api?type=idengine} } \seealso{ \code{\link{bold_identify_parents}} } bold/LICENSE0000644000176200001440000000005713034000342012166 0ustar liggesusersYEAR: 2017 COPYRIGHT HOLDER: Scott Chamberlain