solrium/0000755000176200001440000000000013176564220011752 5ustar liggesuserssolrium/inst/0000755000176200001440000000000013176475641012737 5ustar liggesuserssolrium/inst/examples/0000755000176200001440000000000012600402073014531 5ustar liggesuserssolrium/inst/examples/books2_delete.json0000644000176200001440000000024012600402073020141 0ustar liggesusers{ "delete": {"id" : "343334534545"}, "delete": {"id" : "29234928423434"}, "delete": {"id" : "3345345345345"}, "delete": {"id" : "2343454435"} } solrium/inst/examples/add_delete.json0000644000176200001440000000072412600402073017501 0ustar liggesusers{ "add": { "doc": { "id" : "978-0641723445", "cat" : ["book","hardcover"], "name" : "The Lightning Thief", "author" : "Rick Riordan", "series_t" : "Percy Jackson and the Olympians", "sequence_i" : 1, "genre_s" : "fantasy", "inStock" : true, "price" : 12.50, "pages_i" : 384 } }, "delete": { "id" : "456" } } solrium/inst/examples/add_delete.xml0000644000176200001440000000104312600402073017323 0ustar liggesusers 978-0641723445 book,hardcover The Lightning Thief Rick Riordan Percy Jackson and the Olympians 1 fantasy TRUE 12.5 384 456 solrium/inst/examples/schema.xml0000644000176200001440000007071112600402073016521 0ustar liggesusers id solrium/inst/examples/books.xml0000644000176200001440000000347012656437677016431 0ustar liggesusers 978-0641723445 book,hardcover The Lightning Thief Rick Riordan Percy Jackson and the Olympians 1 fantasy TRUE 12.5 384 978-1423103349 book,paperback The Sea of Monsters Rick Riordan Percy Jackson and the Olympians 2 fantasy TRUE 6.5 304 978-1857995879 book,paperback Sophies World : The Greek Philosophers Jostein Gaarder NA 1 fantasy TRUE 3.7 64 978-1933988177 book,paperback Lucene in Action, Second Edition Michael McCandless NA 1 IT TRUE 30.5 475 solrium/inst/examples/books_delete.json0000644000176200001440000000024712600402073020066 0ustar liggesusers{ "delete": {"id" : "978-0641723445"}, "delete": {"id" : "978-1423103349"}, "delete": {"id" : "978-1857995879"}, "delete": {"id" : "978-1933988177"} } solrium/inst/examples/updatecommands_delete.xml0000644000176200001440000000003612600402073021600 0ustar liggesusers345 solrium/inst/examples/books.csv0000644000176200001440000000167712600402073016376 0ustar liggesusersid,cat,name,price,inStock,author,series_t,sequence_i,genre_s 0553573403,book,A Game of Thrones,7.99,true,George R.R. Martin,"A Song of Ice and Fire",1,fantasy 0553579908,book,A Clash of Kings,7.99,true,George R.R. Martin,"A Song of Ice and Fire",2,fantasy 055357342X,book,A Storm of Swords,7.99,true,George R.R. Martin,"A Song of Ice and Fire",3,fantasy 0553293354,book,Foundation,7.99,true,Isaac Asimov,Foundation Novels,1,scifi 0812521390,book,The Black Company,6.99,false,Glen Cook,The Chronicles of The Black Company,1,fantasy 0812550706,book,Ender's Game,6.99,true,Orson Scott Card,Ender,1,scifi 0441385532,book,Jhereg,7.95,false,Steven Brust,Vlad Taltos,1,fantasy 0380014300,book,Nine Princes In Amber,6.99,true,Roger Zelazny,the Chronicles of Amber,1,fantasy 0805080481,book,The Book of Three,5.99,true,Lloyd Alexander,The Chronicles of Prydain,1,fantasy 080508049X,book,The Black Cauldron,5.99,true,Lloyd Alexander,The Chronicles of Prydain,2,fantasy solrium/inst/examples/updatecommands_add.json0000644000176200001440000000050712600402073021242 0ustar liggesusers{ "add": { "doc": { "id" : "345", "cat" : ["book","hardcover"], "name" : "Cars and bikes", "author" : "Hello world", "series_t" : "A series of books", "sequence_i" : 1, "genre_s" : "science fiction", "inStock" : true, "price" : 12.75, "pages_i" : 3 } } } solrium/inst/examples/books.json0000644000176200001440000000217412600402073016545 0ustar liggesusers[ { "id" : "978-0641723445", "cat" : ["book","hardcover"], "name" : "The Lightning Thief", "author" : "Rick Riordan", "series_t" : "Percy Jackson and the Olympians", "sequence_i" : 1, "genre_s" : "fantasy", "inStock" : true, "price" : 12.50, "pages_i" : 384 } , { "id" : "978-1423103349", "cat" : ["book","paperback"], "name" : "The Sea of Monsters", "author" : "Rick Riordan", "series_t" : "Percy Jackson and the Olympians", "sequence_i" : 2, "genre_s" : "fantasy", "inStock" : true, "price" : 6.49, "pages_i" : 304 } , { "id" : "978-1857995879", "cat" : ["book","paperback"], "name" : "Sophie's World : The Greek Philosophers", "author" : "Jostein Gaarder", "sequence_i" : 1, "genre_s" : "fantasy", "inStock" : true, "price" : 3.07, "pages_i" : 64 } , { "id" : "978-1933988177", "cat" : ["book","paperback"], "name" : "Lucene in Action, Second Edition", "author" : "Michael McCandless", "sequence_i" : 1, "genre_s" : "IT", "inStock" : true, "price" : 30.50, "pages_i" : 475 } ] solrium/inst/examples/updatecommands_delete.json0000644000176200001440000000004012600402073021744 0ustar liggesusers{ "delete": { "id": "345" } } solrium/inst/examples/solrconfig.xml0000644000176200001440000005357212600402073017434 0ustar liggesusers 5.2.1 ${solr.data.dir:} ${solr.lock.type:native} true ${solr.ulog.dir:} ${solr.ulog.numVersionBuckets:65536} ${solr.autoCommit.maxTime:15000} false ${solr.autoSoftCommit.maxTime:-1} 1024 true 20 200 false 2 explicit 10 explicit json true text {!xport} xsort false query text explicit true true false terms *:* solrium/inst/examples/books2.json0000644000176200001440000000212112600402073016617 0ustar liggesusers[ { "id" : "343334534545", "cat" : ["book","hardcover"], "name" : "Bears, lions", "author" : "Foo bar", "series_t" : "Percy Jackson and the Olympians", "sequence_i" : 1, "genre_s" : "fantasy", "inStock" : true, "price" : 12.50, "pages_i" : 384 } , { "id" : "29234928423434", "cat" : ["book","paperback"], "name" : "The Sea of Monsters", "author" : "Rick Bick", "series_t" : "Stuff and things", "sequence_i" : 2, "genre_s" : "fantasy", "inStock" : true, "price" : 3.49, "pages_i" : 404 } , { "id" : "3345345345345", "cat" : ["book","paperback"], "name" : "Sophie's World : The Roman Philosophers", "author" : "Jill Brown", "sequence_i" : 1, "genre_s" : "fantasy", "inStock" : true, "price" : 4.07, "pages_i" : 64 } , { "id" : "2343454435", "cat" : ["book","paperback"], "name" : "Lucene in Action, Third Edition", "author" : "Michael McCandless", "sequence_i" : 1, "genre_s" : "IT", "inStock" : true, "price" : 34.50, "pages_i" : 375 } ] solrium/inst/examples/books2_delete.xml0000644000176200001440000000016012600402073017771 0ustar liggesusers 343334534545 29234928423434 3345345345345 2343454435 solrium/inst/examples/updatecommands_add.xml0000644000176200001440000000064312600402073021072 0ustar liggesusers 05991 "Cars and bikes" "Hello world" "A series of books" 1 "science fiction" true 12.75 3 solrium/inst/examples/books_delete.xml0000644000176200001440000000016712600402073017716 0ustar liggesusers 978-0641723445 978-1423103349 978-1857995879 978-1933988177 solrium/inst/doc/0000755000176200001440000000000013176475640013503 5ustar liggesuserssolrium/inst/doc/cores_collections.Rmd0000644000176200001440000000365113055337615017660 0ustar liggesusers Cores/collections management ============================ ## Installation Stable version from CRAN ```r install.packages("solrium") ``` Or the development version from GitHub ```r install.packages("devtools") devtools::install_github("ropensci/solrium") ``` Load ```r library("solrium") ``` Initialize connection ```r solr_connect() ``` ``` #> #> url: http://localhost:8983 #> errors: simple #> verbose: TRUE #> proxy: ``` ## Cores There are many operations you can do on cores, including: * `core_create()` - create a core * `core_exists()` - check if a core exists * `core_mergeindexes()` - merge indexes * `core_reload()` - reload a core * `core_rename()` - rename a core * `core_requeststatus()` - check request status * `core_split()` - split a core * `core_status()` - check core status * `core_swap()` - core swap * `core_unload()` - delete a core ### Create a core ```r core_create() ``` ### Delete a core ```r core_unload() ``` ## Collections There are many operations you can do on collections, including: * `collection_addreplica()` * `collection_addreplicaprop()` * `collection_addrole()` * `collection_balanceshardunique()` * `collection_clusterprop()` * `collection_clusterstatus()` * `collection_create()` * `collection_createalias()` * `collection_createshard()` * `collection_delete()` * `collection_deletealias()` * `collection_deletereplica()` * `collection_deletereplicaprop()` * `collection_deleteshard()` * `collection_list()` * `collection_migrate()` * `collection_overseerstatus()` * `collection_rebalanceleaders()` * `collection_reload()` * `collection_removerole()` * `collection_requeststatus()` * `collection_splitshard()` ### Create a collection ```r collection_create() ``` ### Delete a collection ```r collection_delete() ``` solrium/inst/doc/document_management.Rmd0000644000176200001440000001256713055337615020167 0ustar liggesusers Document management =================== ## Installation Stable version from CRAN ```r install.packages("solrium") ``` Or the development version from GitHub ```r install.packages("devtools") devtools::install_github("ropensci/solrium") ``` Load ```r library("solrium") ``` Initialize connection. By default, you connect to `http://localhost:8983` ```r solr_connect() ``` ``` #> #> url: http://localhost:8983 #> errors: simple #> verbose: TRUE #> proxy: ``` ## Create documents from R objects For now, only lists and data.frame's supported. ### data.frame ```r df <- data.frame(id = c(67, 68), price = c(1000, 500000000)) add(df, "books") ``` ``` #> $responseHeader #> $responseHeader$status #> [1] 0 #> #> $responseHeader$QTime #> [1] 112 ``` ### list ```r ss <- list(list(id = 1, price = 100), list(id = 2, price = 500)) add(ss, "books") ``` ``` #> $responseHeader #> $responseHeader$status #> [1] 0 #> #> $responseHeader$QTime #> [1] 16 ``` ## Delete documents ### By id Add some documents first ```r docs <- list(list(id = 1, price = 100, name = "brown"), list(id = 2, price = 500, name = "blue"), list(id = 3, price = 2000L, name = "pink")) add(docs, "gettingstarted") ``` ``` #> $responseHeader #> $responseHeader$status #> [1] 0 #> #> $responseHeader$QTime #> [1] 18 ``` And the documents are now in your Solr database ```r tail(solr_search(name = "gettingstarted", "*:*", base = "http://localhost:8983/solr/select", rows = 100)) ``` ``` #> Source: local data frame [3 x 4] #> #> id price name _version_ #> (chr) (int) (chr) (dbl) #> 1 1 100 brown 1.525729e+18 #> 2 2 500 blue 1.525729e+18 #> 3 3 2000 pink 1.525729e+18 ``` Now delete those documents just added ```r delete_by_id(ids = c(1, 2, 3), "gettingstarted") ``` ``` #> $responseHeader #> $responseHeader$status #> [1] 0 #> #> $responseHeader$QTime #> [1] 24 ``` And now they are gone ```r tail(solr_search("gettingstarted", "*:*", base = "http://localhost:8983/solr/select", rows = 100)) ``` ``` #> Source: local data frame [0 x 0] ``` ### By query Add some documents first ```r add(docs, "gettingstarted") ``` ``` #> $responseHeader #> $responseHeader$status #> [1] 0 #> #> $responseHeader$QTime #> [1] 19 ``` And the documents are now in your Solr database ```r tail(solr_search("gettingstarted", "*:*", base = "http://localhost:8983/solr/select", rows = 100)) ``` ``` #> Source: local data frame [3 x 4] #> #> id price name _version_ #> (chr) (int) (chr) (dbl) #> 1 1 100 brown 1.525729e+18 #> 2 2 500 blue 1.525729e+18 #> 3 3 2000 pink 1.525729e+18 ``` Now delete those documents just added ```r delete_by_query(query = "(name:blue OR name:pink)", "gettingstarted") ``` ``` #> $responseHeader #> $responseHeader$status #> [1] 0 #> #> $responseHeader$QTime #> [1] 12 ``` And now they are gone ```r tail(solr_search("gettingstarted", "*:*", base = "http://localhost:8983/solr/select", rows = 100)) ``` ``` #> Source: local data frame [1 x 4] #> #> id price name _version_ #> (chr) (int) (chr) (dbl) #> 1 1 100 brown 1.525729e+18 ``` ## Update documents from files This approach is best if you have many different things you want to do at once, e.g., delete and add files and set any additional options. The functions are: * `update_xml()` * `update_json()` * `update_csv()` There are separate functions for each of the data types as they take slightly different parameters - and to make it more clear that those are the three input options for data types. ### JSON ```r file <- system.file("examples", "books.json", package = "solrium") update_json(file, "books") ``` ``` #> $responseHeader #> $responseHeader$status #> [1] 0 #> #> $responseHeader$QTime #> [1] 39 ``` ### Add and delete in the same file Add a document first, that we can later delete ```r ss <- list(list(id = 456, name = "cat")) add(ss, "books") ``` ``` #> $responseHeader #> $responseHeader$status #> [1] 0 #> #> $responseHeader$QTime #> [1] 19 ``` Now add a new document, and delete the one we just made ```r file <- system.file("examples", "add_delete.xml", package = "solrium") cat(readLines(file), sep = "\n") ``` ``` #> #> #> #> 978-0641723445 #> book,hardcover #> The Lightning Thief #> Rick Riordan #> Percy Jackson and the Olympians #> 1 #> fantasy #> TRUE #> 12.5 #> 384 #> #> #> #> 456 #> #> ``` ```r update_xml(file, "books") ``` ``` #> $responseHeader #> $responseHeader$status #> [1] 0 #> #> $responseHeader$QTime #> [1] 23 ``` ### Notes Note that `update_xml()` and `update_json()` have exactly the same parameters, but simply use different data input formats. `update_csv()` is different in that you can't provide document or field level boosts or other modifications. In addition `update_csv()` can accept not just csv, but tsv and other types of separators. solrium/inst/doc/local_setup.html0000644000176200001440000005000413176475640016702 0ustar liggesusers Local Solr setup

Local Solr setup

OSX

Based on http://lucene.apache.org/solr/quickstart.html

  1. Download most recent version from an Apache mirror http://www.apache.org/dyn/closer.cgi/lucene/solr/5.4.1
  2. Unzip/untar the file. Move to your desired location. Now you have Solr v.5.4.1
  3. Go into the directory you just created: cd solr-5.4.1
  4. Launch Solr: bin/solr start -e cloud -noprompt - Sets up SolrCloud mode, rather than Standalone mode. As far as I can tell, SolrCloud mode seems more common.
  5. Once Step 4 completes, you can go to http://localhost:8983/solr/ now, which is the admin interface for Solr.
  6. Load some documents: bin/post -c gettingstarted docs/
  7. Once Step 6 is complete (will take a few minutes), navigate in your browser to http://localhost:8983/solr/gettingstarted/select?q=*:*&wt=json and you should see a bunch of documents

Linux

You should be able to use the above instructions for OSX on a Linux machine.

Linuxbrew

Linuxbrew is a port of Mac OS homebrew to linux. Operation is essentially the same as for homebrew. Follow the installation instructions for linuxbrew and then the instructions for using homebrew (above) should work without modification.

Windows

You should be able to use the above instructions for OSX on a Windows machine, but with some slight differences. For example, the bin/post tool for OSX and Linux doesn't work on Windows, but see https://cwiki.apache.org/confluence/display/solr/Post+Tool#PostTool-Windows for an equivalent.

solrium usage

And we can now use the solrium R package to query the Solr database to get raw JSON data:

solr_connect('http://localhost:8983')
solr_search("gettingstarted", q = '*:*', raw = TRUE, rows = 3)

#> [1] "{\"responseHeader\":{\"status\":0,\"QTime\":8,\"params\":{\"q\":\"*:*\",\"rows\":\"3\",\"wt\":\"json\"}},\"response\":{\"numFound\":3577,\"start\":0,\"maxScore\":1.0,\"docs\":[{\"id\":\"/Users/sacmac/solr-5.2.1/docs/solr-core/org/apache/solr/highlight/class-use/SolrFragmenter.html\",\"stream_size\":[9016],\"date\":[\"2015-06-10T00:00:00Z\"],\"x_parsed_by\":[\"org.apache.tika.parser.DefaultParser\",\"org.apache.tika.parser.html.HtmlParser\"],\"stream_content_type\":[\"text/html\"],\"dc_title\":[\"Uses of Interface org.apache.solr.highlight.SolrFragmenter (Solr 5.2.1 API)\"],\"content_encoding\":[\"UTF-8\"],\"resourcename\":[\"/Users/sacmac/solr-5.2.1/docs/solr-core/org/apache/solr/highlight/class-use/SolrFragmenter.html\"],\"title\":[\"Uses of Interface org.apache.solr.highlight.SolrFragmenter (Solr 5.2.1 API)\"],\"content_type\":[\"text/html\"],\"_version_\":1507965023127863296},{\"id\":\"/Users/sacmac/solr-5.2.1/docs/solr-core/org/apache/solr/highlight/class-use/SolrFragmentsBuilder.html\",\"stream_size\":[10336],\"date\":[\"2015-06-10T00:00:00Z\"],\"x_parsed_by\":[\"org.apache.tika.parser.DefaultParser\",\"org.apache.tika.parser.html.HtmlParser\"],\"stream_content_type\":[\"text/html\"],\"dc_title\":[\"Uses of Class org.apache.solr.highlight.SolrFragmentsBuilder (Solr 5.2.1 API)\"],\"content_encoding\":[\"UTF-8\"],\"resourcename\":[\"/Users/sacmac/solr-5.2.1/docs/solr-core/org/apache/solr/highlight/class-use/SolrFragmentsBuilder.html\"],\"title\":[\"Uses of Class org.apache.solr.highlight.SolrFragmentsBuilder (Solr 5.2.1 API)\"],\"content_type\":[\"text/html\"],\"_version_\":1507965023153029120},{\"id\":\"/Users/sacmac/solr-5.2.1/docs/solr-core/org/apache/solr/internal/csv/CSVParser.html\",\"stream_size\":[32427],\"date\":[\"2015-06-10T00:00:00Z\"],\"x_parsed_by\":[\"org.apache.tika.parser.DefaultParser\",\"org.apache.tika.parser.html.HtmlParser\"],\"stream_content_type\":[\"text/html\"],\"dc_title\":[\"CSVParser (Solr 5.2.1 API)\"],\"content_encoding\":[\"UTF-8\"],\"resourcename\":[\"/Users/sacmac/solr-5.2.1/docs/solr-core/org/apache/solr/internal/csv/CSVParser.html\"],\"title\":[\"CSVParser (Solr 5.2.1 API)\"],\"content_type\":[\"text/html\"],\"_version_\":1507965023221186560}]}}\n"
#> attr(,"class")
#> [1] "sr_search"
#> attr(,"wt")
#> [1] "json"

Or parsed data to a data.frame (just looking at a few columns for brevity):

solr_search("gettingstarted", q = '*:*', fl = c('date', 'title'))

#> Source: local data frame [10 x 2]
#>
#>                    date                                                                         title
#> 1  2015-06-10T00:00:00Z   Uses of Interface org.apache.solr.highlight.SolrFragmenter (Solr 5.2.1 API)
#> 2  2015-06-10T00:00:00Z Uses of Class org.apache.solr.highlight.SolrFragmentsBuilder (Solr 5.2.1 API)
#> 3  2015-06-10T00:00:00Z                                                    CSVParser (Solr 5.2.1 API)
#> 4  2015-06-10T00:00:00Z                                                     CSVUtils (Solr 5.2.1 API)
#> 5  2015-06-10T00:00:00Z                                 org.apache.solr.internal.csv (Solr 5.2.1 API)
#> 6  2015-06-10T00:00:00Z                 org.apache.solr.internal.csv Class Hierarchy (Solr 5.2.1 API)
#> 7  2015-06-10T00:00:00Z       Uses of Class org.apache.solr.internal.csv.CSVStrategy (Solr 5.2.1 API)
#> 8  2015-06-10T00:00:00Z          Uses of Class org.apache.solr.internal.csv.CSVUtils (Solr 5.2.1 API)
#> 9  2015-06-10T00:00:00Z                                                    CSVConfig (Solr 5.2.1 API)
#> 10 2015-06-10T00:00:00Z                                             CSVConfigGuesser (Solr 5.2.1 API)

See the other vignettes for more thorough examples:

solrium/inst/doc/local_setup.Rmd0000644000176200001440000001303213176457375016465 0ustar liggesusers Local Solr setup ====== ### OSX __Based on http://lucene.apache.org/solr/quickstart.html__ 1. Download most recent version from an Apache mirror http://www.apache.org/dyn/closer.cgi/lucene/solr/5.4.1 2. Unzip/untar the file. Move to your desired location. Now you have Solr `v.5.4.1` 3. Go into the directory you just created: `cd solr-5.4.1` 4. Launch Solr: `bin/solr start -e cloud -noprompt` - Sets up SolrCloud mode, rather than Standalone mode. As far as I can tell, SolrCloud mode seems more common. 5. Once Step 4 completes, you can go to `http://localhost:8983/solr/` now, which is the admin interface for Solr. 6. Load some documents: `bin/post -c gettingstarted docs/` 7. Once Step 6 is complete (will take a few minutes), navigate in your browser to `http://localhost:8983/solr/gettingstarted/select?q=*:*&wt=json` and you should see a bunch of documents ### Linux > You should be able to use the above instructions for OSX on a Linux machine. #### Linuxbrew [Linuxbrew](http://linuxbrew.sh/) is a port of Mac OS homebrew to linux. Operation is essentially the same as for homebrew. Follow the installation instructions for linuxbrew and then the instructions for using homebrew (above) should work without modification. ### Windows You should be able to use the above instructions for OSX on a Windows machine, but with some slight differences. For example, the `bin/post` tool for OSX and Linux doesn't work on Windows, but see https://cwiki.apache.org/confluence/display/solr/Post+Tool#PostTool-Windows for an equivalent. ### `solrium` usage And we can now use the `solrium` R package to query the Solr database to get raw JSON data: ```r solr_connect('http://localhost:8983') solr_search("gettingstarted", q = '*:*', raw = TRUE, rows = 3) #> [1] "{\"responseHeader\":{\"status\":0,\"QTime\":8,\"params\":{\"q\":\"*:*\",\"rows\":\"3\",\"wt\":\"json\"}},\"response\":{\"numFound\":3577,\"start\":0,\"maxScore\":1.0,\"docs\":[{\"id\":\"/Users/sacmac/solr-5.2.1/docs/solr-core/org/apache/solr/highlight/class-use/SolrFragmenter.html\",\"stream_size\":[9016],\"date\":[\"2015-06-10T00:00:00Z\"],\"x_parsed_by\":[\"org.apache.tika.parser.DefaultParser\",\"org.apache.tika.parser.html.HtmlParser\"],\"stream_content_type\":[\"text/html\"],\"dc_title\":[\"Uses of Interface org.apache.solr.highlight.SolrFragmenter (Solr 5.2.1 API)\"],\"content_encoding\":[\"UTF-8\"],\"resourcename\":[\"/Users/sacmac/solr-5.2.1/docs/solr-core/org/apache/solr/highlight/class-use/SolrFragmenter.html\"],\"title\":[\"Uses of Interface org.apache.solr.highlight.SolrFragmenter (Solr 5.2.1 API)\"],\"content_type\":[\"text/html\"],\"_version_\":1507965023127863296},{\"id\":\"/Users/sacmac/solr-5.2.1/docs/solr-core/org/apache/solr/highlight/class-use/SolrFragmentsBuilder.html\",\"stream_size\":[10336],\"date\":[\"2015-06-10T00:00:00Z\"],\"x_parsed_by\":[\"org.apache.tika.parser.DefaultParser\",\"org.apache.tika.parser.html.HtmlParser\"],\"stream_content_type\":[\"text/html\"],\"dc_title\":[\"Uses of Class org.apache.solr.highlight.SolrFragmentsBuilder (Solr 5.2.1 API)\"],\"content_encoding\":[\"UTF-8\"],\"resourcename\":[\"/Users/sacmac/solr-5.2.1/docs/solr-core/org/apache/solr/highlight/class-use/SolrFragmentsBuilder.html\"],\"title\":[\"Uses of Class org.apache.solr.highlight.SolrFragmentsBuilder (Solr 5.2.1 API)\"],\"content_type\":[\"text/html\"],\"_version_\":1507965023153029120},{\"id\":\"/Users/sacmac/solr-5.2.1/docs/solr-core/org/apache/solr/internal/csv/CSVParser.html\",\"stream_size\":[32427],\"date\":[\"2015-06-10T00:00:00Z\"],\"x_parsed_by\":[\"org.apache.tika.parser.DefaultParser\",\"org.apache.tika.parser.html.HtmlParser\"],\"stream_content_type\":[\"text/html\"],\"dc_title\":[\"CSVParser (Solr 5.2.1 API)\"],\"content_encoding\":[\"UTF-8\"],\"resourcename\":[\"/Users/sacmac/solr-5.2.1/docs/solr-core/org/apache/solr/internal/csv/CSVParser.html\"],\"title\":[\"CSVParser (Solr 5.2.1 API)\"],\"content_type\":[\"text/html\"],\"_version_\":1507965023221186560}]}}\n" #> attr(,"class") #> [1] "sr_search" #> attr(,"wt") #> [1] "json" ``` Or parsed data to a data.frame (just looking at a few columns for brevity): ```r solr_search("gettingstarted", q = '*:*', fl = c('date', 'title')) #> Source: local data frame [10 x 2] #> #> date title #> 1 2015-06-10T00:00:00Z Uses of Interface org.apache.solr.highlight.SolrFragmenter (Solr 5.2.1 API) #> 2 2015-06-10T00:00:00Z Uses of Class org.apache.solr.highlight.SolrFragmentsBuilder (Solr 5.2.1 API) #> 3 2015-06-10T00:00:00Z CSVParser (Solr 5.2.1 API) #> 4 2015-06-10T00:00:00Z CSVUtils (Solr 5.2.1 API) #> 5 2015-06-10T00:00:00Z org.apache.solr.internal.csv (Solr 5.2.1 API) #> 6 2015-06-10T00:00:00Z org.apache.solr.internal.csv Class Hierarchy (Solr 5.2.1 API) #> 7 2015-06-10T00:00:00Z Uses of Class org.apache.solr.internal.csv.CSVStrategy (Solr 5.2.1 API) #> 8 2015-06-10T00:00:00Z Uses of Class org.apache.solr.internal.csv.CSVUtils (Solr 5.2.1 API) #> 9 2015-06-10T00:00:00Z CSVConfig (Solr 5.2.1 API) #> 10 2015-06-10T00:00:00Z CSVConfigGuesser (Solr 5.2.1 API) ``` See the other vignettes for more thorough examples: * `Document management` * `Cores/collections management` * `Solr Search` solrium/inst/doc/search.Rmd0000644000176200001440000005047613055337615015423 0ustar liggesusers Solr search =========== **A general purpose R interface to [Apache Solr](http://lucene.apache.org/solr/)** ## Solr info + [Solr home page](http://lucene.apache.org/solr/) + [Highlighting help](http://wiki.apache.org/solr/HighlightingParameters) + [Faceting help](http://wiki.apache.org/solr/SimpleFacetParameters) + [Install and Setup SOLR in OSX, including running Solr](http://risnandar.wordpress.com/2013/09/08/how-to-install-and-setup-apache-lucene-solr-in-osx/) ## Installation Stable version from CRAN ```r install.packages("solrium") ``` Or the development version from GitHub ```r install.packages("devtools") devtools::install_github("ropensci/solrium") ``` Load ```r library("solrium") ``` ## Setup connection You can setup for a remote Solr instance or on your local machine. ```r solr_connect('http://api.plos.org/search') #> #> url: http://api.plos.org/search #> errors: simple #> verbose: TRUE #> proxy: ``` ## Rundown `solr_search()` only returns the `docs` element of a Solr response body. If `docs` is all you need, then this function will do the job. If you need facet data only, or mlt data only, see the appropriate functions for each of those below. Another function, `solr_all()` has a similar interface in terms of parameter as `solr_search()`, but returns all parts of the response body, including, facets, mlt, groups, stats, etc. as long as you request those. ## Search docs `solr_search()` returns only docs. A basic search: ```r solr_search(q = '*:*', rows = 2, fl = 'id') #> Source: local data frame [2 x 1] #> #> id #> (chr) #> 1 10.1371/journal.pone.0142243/references #> 2 10.1371/journal.pone.0142243/body ``` __Search in specific fields with `:`__ Search for word ecology in title and cell in the body ```r solr_search(q = 'title:"ecology" AND body:"cell"', fl = 'title', rows = 5) #> Source: local data frame [5 x 1] #> #> title #> (chr) #> 1 The Ecology of Collective Behavior #> 2 Ecology's Big, Hot Idea #> 3 Spatial Ecology of Bacteria at the Microscale in Soil #> 4 Biofilm Formation As a Response to Ecological Competition #> 5 Ecology of Root Colonizing Massilia (Oxalobacteraceae) ``` __Wildcards__ Search for word that starts with "cell" in the title field ```r solr_search(q = 'title:"cell*"', fl = 'title', rows = 5) #> Source: local data frame [5 x 1] #> #> title #> (chr) #> 1 Tumor Cell Recognition Efficiency by T Cells #> 2 Cancer Stem Cell-Like Side Population Cells in Clear Cell Renal Cell Carcin #> 3 Dcas Supports Cell Polarization and Cell-Cell Adhesion Complexes in Develop #> 4 Cell-Cell Contact Preserves Cell Viability via Plakoglobin #> 5 MS4a4B, a CD20 Homologue in T Cells, Inhibits T Cell Propagation by Modulat ``` __Proximity search__ Search for words "sports" and "alcohol" within four words of each other ```r solr_search(q = 'everything:"stem cell"~7', fl = 'title', rows = 3) #> Source: local data frame [3 x 1] #> #> title #> (chr) #> 1 Correction: Reduced Intensity Conditioning, Combined Transplantation of Hap #> 2 A Recipe for Self-Renewing Brain #> 3 Gene Expression Profile Created for Mouse Stem Cells and Developing Embryo ``` __Range searches__ Search for articles with Twitter count between 5 and 10 ```r solr_search(q = '*:*', fl = c('alm_twitterCount', 'id'), fq = 'alm_twitterCount:[5 TO 50]', rows = 10) #> Source: local data frame [10 x 2] #> #> id alm_twitterCount #> (chr) (int) #> 1 10.1371/journal.ppat.1005403/introduction 6 #> 2 10.1371/journal.ppat.1005403/results_and_discussion 6 #> 3 10.1371/journal.ppat.1005403/materials_and_methods 6 #> 4 10.1371/journal.ppat.1005403/supporting_information 6 #> 5 10.1371/journal.ppat.1005401 6 #> 6 10.1371/journal.ppat.1005401/title 6 #> 7 10.1371/journal.ppat.1005401/abstract 6 #> 8 10.1371/journal.ppat.1005401/references 6 #> 9 10.1371/journal.ppat.1005401/body 6 #> 10 10.1371/journal.ppat.1005401/introduction 6 ``` __Boosts__ Assign higher boost to title matches than to body matches (compare the two calls) ```r solr_search(q = 'title:"cell" abstract:"science"', fl = 'title', rows = 3) #> Source: local data frame [3 x 1] #> #> title #> (chr) #> 1 I Want More and Better Cells! – An Outreach Project about Stem Cells and It #> 2 Centre of the Cell: Science Comes to Life #> 3 Globalization of Stem Cell Science: An Examination of Current and Past Coll ``` ```r solr_search(q = 'title:"cell"^1.5 AND abstract:"science"', fl = 'title', rows = 3) #> Source: local data frame [3 x 1] #> #> title #> (chr) #> 1 Centre of the Cell: Science Comes to Life #> 2 I Want More and Better Cells! – An Outreach Project about Stem Cells and It #> 3 Derivation of Hair-Inducing Cell from Human Pluripotent Stem Cells ``` ## Search all `solr_all()` differs from `solr_search()` in that it allows specifying facets, mlt, groups, stats, etc, and returns all of those. It defaults to `parsetype = "list"` and `wt="json"`, whereas `solr_search()` defaults to `parsetype = "df"` and `wt="csv"`. `solr_all()` returns by default a list, whereas `solr_search()` by default returns a data.frame. A basic search, just docs output ```r solr_all(q = '*:*', rows = 2, fl = 'id') #> $response #> $response$numFound #> [1] 1502814 #> #> $response$start #> [1] 0 #> #> $response$docs #> $response$docs[[1]] #> $response$docs[[1]]$id #> [1] "10.1371/journal.pone.0142243/references" #> #> #> $response$docs[[2]] #> $response$docs[[2]]$id #> [1] "10.1371/journal.pone.0142243/body" ``` Get docs, mlt, and stats output ```r solr_all(q = 'ecology', rows = 2, fl = 'id', mlt = 'true', mlt.count = 2, mlt.fl = 'abstract', stats = 'true', stats.field = 'counter_total_all') #> $response #> $response$numFound #> [1] 31467 #> #> $response$start #> [1] 0 #> #> $response$docs #> $response$docs[[1]] #> $response$docs[[1]]$id #> [1] "10.1371/journal.pone.0059813" #> #> #> $response$docs[[2]] #> $response$docs[[2]]$id #> [1] "10.1371/journal.pone.0001248" #> #> #> #> #> $moreLikeThis #> $moreLikeThis$`10.1371/journal.pone.0059813` #> $moreLikeThis$`10.1371/journal.pone.0059813`$numFound #> [1] 152704 #> #> $moreLikeThis$`10.1371/journal.pone.0059813`$start #> [1] 0 #> #> $moreLikeThis$`10.1371/journal.pone.0059813`$docs #> $moreLikeThis$`10.1371/journal.pone.0059813`$docs[[1]] #> $moreLikeThis$`10.1371/journal.pone.0059813`$docs[[1]]$id #> [1] "10.1371/journal.pone.0111996" #> #> #> $moreLikeThis$`10.1371/journal.pone.0059813`$docs[[2]] #> $moreLikeThis$`10.1371/journal.pone.0059813`$docs[[2]]$id #> [1] "10.1371/journal.pone.0143687" #> #> #> #> #> $moreLikeThis$`10.1371/journal.pone.0001248` #> $moreLikeThis$`10.1371/journal.pone.0001248`$numFound #> [1] 159058 #> #> $moreLikeThis$`10.1371/journal.pone.0001248`$start #> [1] 0 #> #> $moreLikeThis$`10.1371/journal.pone.0001248`$docs #> $moreLikeThis$`10.1371/journal.pone.0001248`$docs[[1]] #> $moreLikeThis$`10.1371/journal.pone.0001248`$docs[[1]]$id #> [1] "10.1371/journal.pone.0001275" #> #> #> $moreLikeThis$`10.1371/journal.pone.0001248`$docs[[2]] #> $moreLikeThis$`10.1371/journal.pone.0001248`$docs[[2]]$id #> [1] "10.1371/journal.pone.0024192" #> #> #> #> #> #> $stats #> $stats$stats_fields #> $stats$stats_fields$counter_total_all #> $stats$stats_fields$counter_total_all$min #> [1] 16 #> #> $stats$stats_fields$counter_total_all$max #> [1] 367697 #> #> $stats$stats_fields$counter_total_all$count #> [1] 31467 #> #> $stats$stats_fields$counter_total_all$missing #> [1] 0 #> #> $stats$stats_fields$counter_total_all$sum #> [1] 141552408 #> #> $stats$stats_fields$counter_total_all$sumOfSquares #> [1] 3.162032e+12 #> #> $stats$stats_fields$counter_total_all$mean #> [1] 4498.44 #> #> $stats$stats_fields$counter_total_all$stddev #> [1] 8958.45 #> #> $stats$stats_fields$counter_total_all$facets #> named list() ``` ## Facet ```r solr_facet(q = '*:*', facet.field = 'journal', facet.query = c('cell', 'bird')) #> $facet_queries #> term value #> 1 cell 128657 #> 2 bird 13063 #> #> $facet_fields #> $facet_fields$journal #> X1 X2 #> 1 plos one 1233662 #> 2 plos genetics 49285 #> 3 plos pathogens 42817 #> 4 plos computational biology 36373 #> 5 plos neglected tropical diseases 33911 #> 6 plos biology 28745 #> 7 plos medicine 19934 #> 8 plos clinical trials 521 #> 9 plos medicin 9 #> #> #> $facet_pivot #> NULL #> #> $facet_dates #> NULL #> #> $facet_ranges #> NULL ``` ## Highlight ```r solr_highlight(q = 'alcohol', hl.fl = 'abstract', rows = 2) #> $`10.1371/journal.pmed.0040151` #> $`10.1371/journal.pmed.0040151`$abstract #> [1] "Background: Alcohol consumption causes an estimated 4% of the global disease burden, prompting" #> #> #> $`10.1371/journal.pone.0027752` #> $`10.1371/journal.pone.0027752`$abstract #> [1] "Background: The negative influences of alcohol on TB management with regard to delays in seeking" ``` ## Stats ```r out <- solr_stats(q = 'ecology', stats.field = c('counter_total_all', 'alm_twitterCount'), stats.facet = c('journal', 'volume')) ``` ```r out$data #> min max count missing sum sumOfSquares #> counter_total_all 16 367697 31467 0 141552408 3.162032e+12 #> alm_twitterCount 0 1756 31467 0 168586 3.267801e+07 #> mean stddev #> counter_total_all 4498.439889 8958.45030 #> alm_twitterCount 5.357549 31.77757 ``` ```r out$facet #> $counter_total_all #> $counter_total_all$volume #> min max count missing sum sumOfSquares mean stddev #> 1 20 166202 887 0 2645927 63864880371 2983.007 7948.200 #> 2 495 103147 105 0 1017325 23587444387 9688.810 11490.287 #> 3 1950 69628 69 0 704216 13763808310 10206.029 9834.333 #> 4 742 13856 9 0 48373 375236903 5374.778 3795.438 #> 5 1871 182622 81 0 1509647 87261688837 18637.617 27185.811 #> 6 1667 117922 482 0 5836186 162503606896 12108.270 13817.754 #> 7 1340 128083 741 0 7714963 188647618509 10411.556 12098.852 #> 8 667 362410 1010 0 9692492 340237069126 9596.527 15653.040 #> 9 103 113220 1539 0 12095764 218958657256 7859.496 8975.188 #> 10 72 243873 2948 0 17699332 327210596846 6003.844 8658.717 #> 11 51 184259 4825 0 24198104 382922818910 5015.151 7363.541 #> 12 16 367697 6360 0 26374352 533183277470 4146.911 8163.790 #> 13 42 287741 6620 0 21003701 612616254755 3172.765 9082.194 #> 14 128 161520 5791 0 11012026 206899109466 1901.576 5667.209 #> volume #> 1 11 #> 2 12 #> 3 13 #> 4 14 #> 5 1 #> 6 2 #> 7 3 #> 8 4 #> 9 5 #> 10 6 #> 11 7 #> 12 8 #> 13 9 #> 14 10 #> #> $counter_total_all$journal #> min max count missing sum sumOfSquares mean stddev #> 1 667 117922 243 0 4074303 1.460258e+11 16766.679 17920.074 #> 2 742 265561 884 0 14006081 5.507548e+11 15843.983 19298.065 #> 3 8463 13797 2 0 22260 2.619796e+08 11130.000 3771.708 #> 4 16 367697 25915 0 96069530 1.943903e+12 3707.101 7827.546 #> 5 915 61956 595 0 4788553 6.579963e+10 8047.988 6774.558 #> 6 548 76290 758 0 6326284 9.168443e+10 8346.021 7167.106 #> 7 268 212048 1239 0 5876481 1.010080e+11 4742.923 7686.101 #> 8 495 287741 580 0 4211717 1.411022e+11 7261.581 13815.867 #> journal #> 1 plos medicine #> 2 plos biology #> 3 plos clinical trials #> 4 plos one #> 5 plos pathogens #> 6 plos genetics #> 7 plos neglected tropical diseases #> 8 plos computational biology #> #> #> $alm_twitterCount #> $alm_twitterCount$volume #> min max count missing sum sumOfSquares mean stddev volume #> 1 0 1756 887 0 12295 4040629 13.861330 66.092178 11 #> 2 0 1045 105 0 6466 1885054 61.580952 119.569402 12 #> 3 0 283 69 0 3478 509732 50.405797 70.128101 13 #> 4 6 274 9 0 647 102391 71.888889 83.575482 14 #> 5 0 42 81 0 176 4996 2.172840 7.594060 1 #> 6 0 74 482 0 628 15812 1.302905 5.583197 2 #> 7 0 48 741 0 652 11036 0.879892 3.760087 3 #> 8 0 239 1010 0 1039 74993 1.028713 8.559485 4 #> 9 0 126 1539 0 1901 90297 1.235218 7.562004 5 #> 10 0 886 2948 0 4357 1245453 1.477951 20.504442 6 #> 11 0 822 4825 0 19646 2037596 4.071710 20.144602 7 #> 12 0 1503 6360 0 35938 6505618 5.650629 31.482092 8 #> 13 0 1539 6620 0 49837 12847207 7.528248 43.408246 9 #> 14 0 863 5791 0 31526 3307198 5.443965 23.271216 10 #> #> $alm_twitterCount$journal #> min max count missing sum sumOfSquares mean stddev #> 1 0 777 243 0 4251 1028595 17.493827 62.79406 #> 2 0 1756 884 0 16405 6088729 18.557692 80.93655 #> 3 0 3 2 0 3 9 1.500000 2.12132 #> 4 0 1539 25915 0 123409 23521391 4.762068 29.74883 #> 5 0 122 595 0 4265 160581 7.168067 14.79428 #> 6 0 178 758 0 4277 148277 5.642480 12.80605 #> 7 0 886 1239 0 4972 1048908 4.012914 28.82956 #> 8 0 285 580 0 4166 265578 7.182759 20.17431 #> journal #> 1 plos medicine #> 2 plos biology #> 3 plos clinical trials #> 4 plos one #> 5 plos pathogens #> 6 plos genetics #> 7 plos neglected tropical diseases #> 8 plos computational biology ``` ## More like this `solr_mlt` is a function to return similar documents to the one ```r out <- solr_mlt(q = 'title:"ecology" AND body:"cell"', mlt.fl = 'title', mlt.mindf = 1, mlt.mintf = 1, fl = 'counter_total_all', rows = 5) out$docs #> Source: local data frame [5 x 2] #> #> id counter_total_all #> (chr) (int) #> 1 10.1371/journal.pbio.1001805 17081 #> 2 10.1371/journal.pbio.0020440 23882 #> 3 10.1371/journal.pone.0087217 5935 #> 4 10.1371/journal.pbio.1002191 13036 #> 5 10.1371/journal.pone.0040117 4316 ``` ```r out$mlt #> $`10.1371/journal.pbio.1001805` #> id counter_total_all #> 1 10.1371/journal.pone.0082578 2196 #> 2 10.1371/journal.pone.0098876 2448 #> 3 10.1371/journal.pone.0102159 1177 #> 4 10.1371/journal.pcbi.1002652 3102 #> 5 10.1371/journal.pcbi.1003408 6942 #> #> $`10.1371/journal.pbio.0020440` #> id counter_total_all #> 1 10.1371/journal.pone.0102679 3112 #> 2 10.1371/journal.pone.0035964 5571 #> 3 10.1371/journal.pone.0003259 2800 #> 4 10.1371/journal.pntd.0003377 3392 #> 5 10.1371/journal.pone.0068814 7522 #> #> $`10.1371/journal.pone.0087217` #> id counter_total_all #> 1 10.1371/journal.pone.0131665 409 #> 2 10.1371/journal.pcbi.0020092 19604 #> 3 10.1371/journal.pone.0133941 475 #> 4 10.1371/journal.pone.0123774 997 #> 5 10.1371/journal.pone.0140306 322 #> #> $`10.1371/journal.pbio.1002191` #> id counter_total_all #> 1 10.1371/journal.pbio.1002232 1950 #> 2 10.1371/journal.pone.0131700 979 #> 3 10.1371/journal.pone.0070448 1608 #> 4 10.1371/journal.pone.0028737 7481 #> 5 10.1371/journal.pone.0052330 5595 #> #> $`10.1371/journal.pone.0040117` #> id counter_total_all #> 1 10.1371/journal.pone.0069352 2763 #> 2 10.1371/journal.pone.0148280 467 #> 3 10.1371/journal.pone.0035502 4031 #> 4 10.1371/journal.pone.0014065 5764 #> 5 10.1371/journal.pone.0113280 1984 ``` ## Groups `solr_groups()` is a function to return similar documents to the one ```r solr_group(q = 'ecology', group.field = 'journal', group.limit = 1, fl = c('id', 'alm_twitterCount')) #> groupValue numFound start #> 1 plos one 25915 0 #> 2 plos computational biology 580 0 #> 3 plos biology 884 0 #> 4 none 1251 0 #> 5 plos medicine 243 0 #> 6 plos neglected tropical diseases 1239 0 #> 7 plos pathogens 595 0 #> 8 plos genetics 758 0 #> 9 plos clinical trials 2 0 #> id alm_twitterCount #> 1 10.1371/journal.pone.0059813 56 #> 2 10.1371/journal.pcbi.1003594 21 #> 3 10.1371/journal.pbio.1002358 16 #> 4 10.1371/journal.pone.0046671 2 #> 5 10.1371/journal.pmed.1000303 0 #> 6 10.1371/journal.pntd.0002577 2 #> 7 10.1371/journal.ppat.1003372 2 #> 8 10.1371/journal.pgen.1001197 0 #> 9 10.1371/journal.pctr.0020010 0 ``` ## Parsing `solr_parse()` is a general purpose parser function with extension methods for parsing outputs from functions in `solr`. `solr_parse()` is used internally within functions to do parsing after retrieving data from the server. You can optionally get back raw `json`, `xml`, or `csv` with the `raw=TRUE`, and then parse afterwards with `solr_parse()`. For example: ```r (out <- solr_highlight(q = 'alcohol', hl.fl = 'abstract', rows = 2, raw = TRUE)) #> [1] "{\"response\":{\"numFound\":20268,\"start\":0,\"docs\":[{},{}]},\"highlighting\":{\"10.1371/journal.pmed.0040151\":{\"abstract\":[\"Background: Alcohol consumption causes an estimated 4% of the global disease burden, prompting\"]},\"10.1371/journal.pone.0027752\":{\"abstract\":[\"Background: The negative influences of alcohol on TB management with regard to delays in seeking\"]}}}\n" #> attr(,"class") #> [1] "sr_high" #> attr(,"wt") #> [1] "json" ``` Then parse ```r solr_parse(out, 'df') #> names #> 1 10.1371/journal.pmed.0040151 #> 2 10.1371/journal.pone.0027752 #> abstract #> 1 Background: Alcohol consumption causes an estimated 4% of the global disease burden, prompting #> 2 Background: The negative influences of alcohol on TB management with regard to delays in seeking ``` [Please report any issues or bugs](https://github.com/ropensci/solrium/issues). solrium/inst/doc/document_management.html0000644000176200001440000005037713176475640020417 0ustar liggesusers Document management

Document management

Installation

Stable version from CRAN

install.packages("solrium")

Or the development version from GitHub

install.packages("devtools")
devtools::install_github("ropensci/solrium")

Load

library("solrium")

Initialize connection. By default, you connect to http://localhost:8983

solr_connect()
#> <solr_connection>
#>   url:    http://localhost:8983
#>   errors: simple
#>   verbose: TRUE
#>   proxy:

Create documents from R objects

For now, only lists and data.frame's supported.

data.frame

df <- data.frame(id = c(67, 68), price = c(1000, 500000000))
add(df, "books")
#> $responseHeader
#> $responseHeader$status
#> [1] 0
#> 
#> $responseHeader$QTime
#> [1] 112

list

ss <- list(list(id = 1, price = 100), list(id = 2, price = 500))
add(ss, "books")
#> $responseHeader
#> $responseHeader$status
#> [1] 0
#> 
#> $responseHeader$QTime
#> [1] 16

Delete documents

By id

Add some documents first

docs <- list(list(id = 1, price = 100, name = "brown"),
             list(id = 2, price = 500, name = "blue"),
             list(id = 3, price = 2000L, name = "pink"))
add(docs, "gettingstarted")
#> $responseHeader
#> $responseHeader$status
#> [1] 0
#> 
#> $responseHeader$QTime
#> [1] 18

And the documents are now in your Solr database

tail(solr_search(name = "gettingstarted", "*:*", base = "http://localhost:8983/solr/select", rows = 100))
#> Source: local data frame [3 x 4]
#> 
#>      id price  name    _version_
#>   (chr) (int) (chr)        (dbl)
#> 1     1   100 brown 1.525729e+18
#> 2     2   500  blue 1.525729e+18
#> 3     3  2000  pink 1.525729e+18

Now delete those documents just added

delete_by_id(ids = c(1, 2, 3), "gettingstarted")
#> $responseHeader
#> $responseHeader$status
#> [1] 0
#> 
#> $responseHeader$QTime
#> [1] 24

And now they are gone

tail(solr_search("gettingstarted", "*:*", base = "http://localhost:8983/solr/select", rows = 100))
#> Source: local data frame [0 x 0]

By query

Add some documents first

add(docs, "gettingstarted")
#> $responseHeader
#> $responseHeader$status
#> [1] 0
#> 
#> $responseHeader$QTime
#> [1] 19

And the documents are now in your Solr database

tail(solr_search("gettingstarted", "*:*", base = "http://localhost:8983/solr/select", rows = 100))
#> Source: local data frame [3 x 4]
#> 
#>      id price  name    _version_
#>   (chr) (int) (chr)        (dbl)
#> 1     1   100 brown 1.525729e+18
#> 2     2   500  blue 1.525729e+18
#> 3     3  2000  pink 1.525729e+18

Now delete those documents just added

delete_by_query(query = "(name:blue OR name:pink)", "gettingstarted")
#> $responseHeader
#> $responseHeader$status
#> [1] 0
#> 
#> $responseHeader$QTime
#> [1] 12

And now they are gone

tail(solr_search("gettingstarted", "*:*", base = "http://localhost:8983/solr/select", rows = 100))
#> Source: local data frame [1 x 4]
#> 
#>      id price  name    _version_
#>   (chr) (int) (chr)        (dbl)
#> 1     1   100 brown 1.525729e+18

Update documents from files

This approach is best if you have many different things you want to do at once, e.g., delete and add files and set any additional options. The functions are:

  • update_xml()
  • update_json()
  • update_csv()

There are separate functions for each of the data types as they take slightly different parameters - and to make it more clear that those are the three input options for data types.

JSON

file <- system.file("examples", "books.json", package = "solrium")
update_json(file, "books")
#> $responseHeader
#> $responseHeader$status
#> [1] 0
#> 
#> $responseHeader$QTime
#> [1] 39

Add and delete in the same file

Add a document first, that we can later delete

ss <- list(list(id = 456, name = "cat"))
add(ss, "books")
#> $responseHeader
#> $responseHeader$status
#> [1] 0
#> 
#> $responseHeader$QTime
#> [1] 19

Now add a new document, and delete the one we just made

file <- system.file("examples", "add_delete.xml", package = "solrium")
cat(readLines(file), sep = "\n")
#> <update>
#>  <add>
#>    <doc>
#>      <field name="id">978-0641723445</field>
#>      <field name="cat">book,hardcover</field>
#>      <field name="name">The Lightning Thief</field>
#>      <field name="author">Rick Riordan</field>
#>      <field name="series_t">Percy Jackson and the Olympians</field>
#>      <field name="sequence_i">1</field>
#>      <field name="genre_s">fantasy</field>
#>      <field name="inStock">TRUE</field>
#>      <field name="price">12.5</field>
#>      <field name="pages_i">384</field>
#>    </doc>
#>  </add>
#>  <delete>
#>      <id>456</id>
#>  </delete>
#> </update>
update_xml(file, "books")
#> $responseHeader
#> $responseHeader$status
#> [1] 0
#> 
#> $responseHeader$QTime
#> [1] 23

Notes

Note that update_xml() and update_json() have exactly the same parameters, but simply use different data input formats. update_csv() is different in that you can't provide document or field level boosts or other modifications. In addition update_csv() can accept not just csv, but tsv and other types of separators.

solrium/inst/doc/search.html0000644000176200001440000011106713176475640015644 0ustar liggesusers Solr search

Solr search

A general purpose R interface to Apache Solr

Solr info

Installation

Stable version from CRAN

install.packages("solrium")

Or the development version from GitHub

install.packages("devtools")
devtools::install_github("ropensci/solrium")

Load

library("solrium")

Setup connection

You can setup for a remote Solr instance or on your local machine.

solr_connect('http://api.plos.org/search')
#> <solr_connection>
#>   url:    http://api.plos.org/search
#>   errors: simple
#>   verbose: TRUE
#>   proxy:

Rundown

solr_search() only returns the docs element of a Solr response body. If docs is all you need, then this function will do the job. If you need facet data only, or mlt data only, see the appropriate functions for each of those below. Another function, solr_all() has a similar interface in terms of parameter as solr_search(), but returns all parts of the response body, including, facets, mlt, groups, stats, etc. as long as you request those.

Search docs

solr_search() returns only docs. A basic search:

solr_search(q = '*:*', rows = 2, fl = 'id')
#> Source: local data frame [2 x 1]
#> 
#>                                        id
#>                                     (chr)
#> 1 10.1371/journal.pone.0142243/references
#> 2       10.1371/journal.pone.0142243/body

Search in specific fields with :

Search for word ecology in title and cell in the body

solr_search(q = 'title:"ecology" AND body:"cell"', fl = 'title', rows = 5)
#> Source: local data frame [5 x 1]
#> 
#>                                                       title
#>                                                       (chr)
#> 1                        The Ecology of Collective Behavior
#> 2                                   Ecology's Big, Hot Idea
#> 3     Spatial Ecology of Bacteria at the Microscale in Soil
#> 4 Biofilm Formation As a Response to Ecological Competition
#> 5    Ecology of Root Colonizing Massilia (Oxalobacteraceae)

Wildcards

Search for word that starts with “cell” in the title field

solr_search(q = 'title:"cell*"', fl = 'title', rows = 5)
#> Source: local data frame [5 x 1]
#> 
#>                                                                         title
#>                                                                         (chr)
#> 1                                Tumor Cell Recognition Efficiency by T Cells
#> 2 Cancer Stem Cell-Like Side Population Cells in Clear Cell Renal Cell Carcin
#> 3 Dcas Supports Cell Polarization and Cell-Cell Adhesion Complexes in Develop
#> 4                  Cell-Cell Contact Preserves Cell Viability via Plakoglobin
#> 5 MS4a4B, a CD20 Homologue in T Cells, Inhibits T Cell Propagation by Modulat

Proximity search

Search for words “sports” and “alcohol” within four words of each other

solr_search(q = 'everything:"stem cell"~7', fl = 'title', rows = 3)
#> Source: local data frame [3 x 1]
#> 
#>                                                                         title
#>                                                                         (chr)
#> 1 Correction: Reduced Intensity Conditioning, Combined Transplantation of Hap
#> 2                                            A Recipe for Self-Renewing Brain
#> 3  Gene Expression Profile Created for Mouse Stem Cells and Developing Embryo

Range searches

Search for articles with Twitter count between 5 and 10

solr_search(q = '*:*', fl = c('alm_twitterCount', 'id'), fq = 'alm_twitterCount:[5 TO 50]',
rows = 10)
#> Source: local data frame [10 x 2]
#> 
#>                                                     id alm_twitterCount
#>                                                  (chr)            (int)
#> 1            10.1371/journal.ppat.1005403/introduction                6
#> 2  10.1371/journal.ppat.1005403/results_and_discussion                6
#> 3   10.1371/journal.ppat.1005403/materials_and_methods                6
#> 4  10.1371/journal.ppat.1005403/supporting_information                6
#> 5                         10.1371/journal.ppat.1005401                6
#> 6                   10.1371/journal.ppat.1005401/title                6
#> 7                10.1371/journal.ppat.1005401/abstract                6
#> 8              10.1371/journal.ppat.1005401/references                6
#> 9                    10.1371/journal.ppat.1005401/body                6
#> 10           10.1371/journal.ppat.1005401/introduction                6

Boosts

Assign higher boost to title matches than to body matches (compare the two calls)

solr_search(q = 'title:"cell" abstract:"science"', fl = 'title', rows = 3)
#> Source: local data frame [3 x 1]
#> 
#>                                                                         title
#>                                                                         (chr)
#> 1 I Want More and Better Cells! – An Outreach Project about Stem Cells and It
#> 2                                   Centre of the Cell: Science Comes to Life
#> 3 Globalization of Stem Cell Science: An Examination of Current and Past Coll
solr_search(q = 'title:"cell"^1.5 AND abstract:"science"', fl = 'title', rows = 3)
#> Source: local data frame [3 x 1]
#> 
#>                                                                         title
#>                                                                         (chr)
#> 1                                   Centre of the Cell: Science Comes to Life
#> 2 I Want More and Better Cells! – An Outreach Project about Stem Cells and It
#> 3          Derivation of Hair-Inducing Cell from Human Pluripotent Stem Cells

Search all

solr_all() differs from solr_search() in that it allows specifying facets, mlt, groups, stats, etc, and returns all of those. It defaults to parsetype = "list" and wt="json", whereas solr_search() defaults to parsetype = "df" and wt="csv". solr_all() returns by default a list, whereas solr_search() by default returns a data.frame.

A basic search, just docs output

solr_all(q = '*:*', rows = 2, fl = 'id')
#> $response
#> $response$numFound
#> [1] 1502814
#> 
#> $response$start
#> [1] 0
#> 
#> $response$docs
#> $response$docs[[1]]
#> $response$docs[[1]]$id
#> [1] "10.1371/journal.pone.0142243/references"
#> 
#> 
#> $response$docs[[2]]
#> $response$docs[[2]]$id
#> [1] "10.1371/journal.pone.0142243/body"

Get docs, mlt, and stats output

solr_all(q = 'ecology', rows = 2, fl = 'id', mlt = 'true', mlt.count = 2, mlt.fl = 'abstract', stats = 'true', stats.field = 'counter_total_all')
#> $response
#> $response$numFound
#> [1] 31467
#> 
#> $response$start
#> [1] 0
#> 
#> $response$docs
#> $response$docs[[1]]
#> $response$docs[[1]]$id
#> [1] "10.1371/journal.pone.0059813"
#> 
#> 
#> $response$docs[[2]]
#> $response$docs[[2]]$id
#> [1] "10.1371/journal.pone.0001248"
#> 
#> 
#> 
#> 
#> $moreLikeThis
#> $moreLikeThis$`10.1371/journal.pone.0059813`
#> $moreLikeThis$`10.1371/journal.pone.0059813`$numFound
#> [1] 152704
#> 
#> $moreLikeThis$`10.1371/journal.pone.0059813`$start
#> [1] 0
#> 
#> $moreLikeThis$`10.1371/journal.pone.0059813`$docs
#> $moreLikeThis$`10.1371/journal.pone.0059813`$docs[[1]]
#> $moreLikeThis$`10.1371/journal.pone.0059813`$docs[[1]]$id
#> [1] "10.1371/journal.pone.0111996"
#> 
#> 
#> $moreLikeThis$`10.1371/journal.pone.0059813`$docs[[2]]
#> $moreLikeThis$`10.1371/journal.pone.0059813`$docs[[2]]$id
#> [1] "10.1371/journal.pone.0143687"
#> 
#> 
#> 
#> 
#> $moreLikeThis$`10.1371/journal.pone.0001248`
#> $moreLikeThis$`10.1371/journal.pone.0001248`$numFound
#> [1] 159058
#> 
#> $moreLikeThis$`10.1371/journal.pone.0001248`$start
#> [1] 0
#> 
#> $moreLikeThis$`10.1371/journal.pone.0001248`$docs
#> $moreLikeThis$`10.1371/journal.pone.0001248`$docs[[1]]
#> $moreLikeThis$`10.1371/journal.pone.0001248`$docs[[1]]$id
#> [1] "10.1371/journal.pone.0001275"
#> 
#> 
#> $moreLikeThis$`10.1371/journal.pone.0001248`$docs[[2]]
#> $moreLikeThis$`10.1371/journal.pone.0001248`$docs[[2]]$id
#> [1] "10.1371/journal.pone.0024192"
#> 
#> 
#> 
#> 
#> 
#> $stats
#> $stats$stats_fields
#> $stats$stats_fields$counter_total_all
#> $stats$stats_fields$counter_total_all$min
#> [1] 16
#> 
#> $stats$stats_fields$counter_total_all$max
#> [1] 367697
#> 
#> $stats$stats_fields$counter_total_all$count
#> [1] 31467
#> 
#> $stats$stats_fields$counter_total_all$missing
#> [1] 0
#> 
#> $stats$stats_fields$counter_total_all$sum
#> [1] 141552408
#> 
#> $stats$stats_fields$counter_total_all$sumOfSquares
#> [1] 3.162032e+12
#> 
#> $stats$stats_fields$counter_total_all$mean
#> [1] 4498.44
#> 
#> $stats$stats_fields$counter_total_all$stddev
#> [1] 8958.45
#> 
#> $stats$stats_fields$counter_total_all$facets
#> named list()

Facet

solr_facet(q = '*:*', facet.field = 'journal', facet.query = c('cell', 'bird'))
#> $facet_queries
#>   term  value
#> 1 cell 128657
#> 2 bird  13063
#> 
#> $facet_fields
#> $facet_fields$journal
#>                                 X1      X2
#> 1                         plos one 1233662
#> 2                    plos genetics   49285
#> 3                   plos pathogens   42817
#> 4       plos computational biology   36373
#> 5 plos neglected tropical diseases   33911
#> 6                     plos biology   28745
#> 7                    plos medicine   19934
#> 8             plos clinical trials     521
#> 9                     plos medicin       9
#> 
#> 
#> $facet_pivot
#> NULL
#> 
#> $facet_dates
#> NULL
#> 
#> $facet_ranges
#> NULL

Highlight

solr_highlight(q = 'alcohol', hl.fl = 'abstract', rows = 2)
#> $`10.1371/journal.pmed.0040151`
#> $`10.1371/journal.pmed.0040151`$abstract
#> [1] "Background: <em>Alcohol</em> consumption causes an estimated 4% of the global disease burden, prompting"
#> 
#> 
#> $`10.1371/journal.pone.0027752`
#> $`10.1371/journal.pone.0027752`$abstract
#> [1] "Background: The negative influences of <em>alcohol</em> on TB management with regard to delays in seeking"

Stats

out <- solr_stats(q = 'ecology', stats.field = c('counter_total_all', 'alm_twitterCount'), stats.facet = c('journal', 'volume'))
out$data
#>                   min    max count missing       sum sumOfSquares
#> counter_total_all  16 367697 31467       0 141552408 3.162032e+12
#> alm_twitterCount    0   1756 31467       0    168586 3.267801e+07
#>                          mean     stddev
#> counter_total_all 4498.439889 8958.45030
#> alm_twitterCount     5.357549   31.77757
out$facet
#> $counter_total_all
#> $counter_total_all$volume
#>     min    max count missing      sum sumOfSquares      mean    stddev
#> 1    20 166202   887       0  2645927  63864880371  2983.007  7948.200
#> 2   495 103147   105       0  1017325  23587444387  9688.810 11490.287
#> 3  1950  69628    69       0   704216  13763808310 10206.029  9834.333
#> 4   742  13856     9       0    48373    375236903  5374.778  3795.438
#> 5  1871 182622    81       0  1509647  87261688837 18637.617 27185.811
#> 6  1667 117922   482       0  5836186 162503606896 12108.270 13817.754
#> 7  1340 128083   741       0  7714963 188647618509 10411.556 12098.852
#> 8   667 362410  1010       0  9692492 340237069126  9596.527 15653.040
#> 9   103 113220  1539       0 12095764 218958657256  7859.496  8975.188
#> 10   72 243873  2948       0 17699332 327210596846  6003.844  8658.717
#> 11   51 184259  4825       0 24198104 382922818910  5015.151  7363.541
#> 12   16 367697  6360       0 26374352 533183277470  4146.911  8163.790
#> 13   42 287741  6620       0 21003701 612616254755  3172.765  9082.194
#> 14  128 161520  5791       0 11012026 206899109466  1901.576  5667.209
#>    volume
#> 1      11
#> 2      12
#> 3      13
#> 4      14
#> 5       1
#> 6       2
#> 7       3
#> 8       4
#> 9       5
#> 10      6
#> 11      7
#> 12      8
#> 13      9
#> 14     10
#> 
#> $counter_total_all$journal
#>    min    max count missing      sum sumOfSquares      mean    stddev
#> 1  667 117922   243       0  4074303 1.460258e+11 16766.679 17920.074
#> 2  742 265561   884       0 14006081 5.507548e+11 15843.983 19298.065
#> 3 8463  13797     2       0    22260 2.619796e+08 11130.000  3771.708
#> 4   16 367697 25915       0 96069530 1.943903e+12  3707.101  7827.546
#> 5  915  61956   595       0  4788553 6.579963e+10  8047.988  6774.558
#> 6  548  76290   758       0  6326284 9.168443e+10  8346.021  7167.106
#> 7  268 212048  1239       0  5876481 1.010080e+11  4742.923  7686.101
#> 8  495 287741   580       0  4211717 1.411022e+11  7261.581 13815.867
#>                            journal
#> 1                    plos medicine
#> 2                     plos biology
#> 3             plos clinical trials
#> 4                         plos one
#> 5                   plos pathogens
#> 6                    plos genetics
#> 7 plos neglected tropical diseases
#> 8       plos computational biology
#> 
#> 
#> $alm_twitterCount
#> $alm_twitterCount$volume
#>    min  max count missing   sum sumOfSquares      mean     stddev volume
#> 1    0 1756   887       0 12295      4040629 13.861330  66.092178     11
#> 2    0 1045   105       0  6466      1885054 61.580952 119.569402     12
#> 3    0  283    69       0  3478       509732 50.405797  70.128101     13
#> 4    6  274     9       0   647       102391 71.888889  83.575482     14
#> 5    0   42    81       0   176         4996  2.172840   7.594060      1
#> 6    0   74   482       0   628        15812  1.302905   5.583197      2
#> 7    0   48   741       0   652        11036  0.879892   3.760087      3
#> 8    0  239  1010       0  1039        74993  1.028713   8.559485      4
#> 9    0  126  1539       0  1901        90297  1.235218   7.562004      5
#> 10   0  886  2948       0  4357      1245453  1.477951  20.504442      6
#> 11   0  822  4825       0 19646      2037596  4.071710  20.144602      7
#> 12   0 1503  6360       0 35938      6505618  5.650629  31.482092      8
#> 13   0 1539  6620       0 49837     12847207  7.528248  43.408246      9
#> 14   0  863  5791       0 31526      3307198  5.443965  23.271216     10
#> 
#> $alm_twitterCount$journal
#>   min  max count missing    sum sumOfSquares      mean   stddev
#> 1   0  777   243       0   4251      1028595 17.493827 62.79406
#> 2   0 1756   884       0  16405      6088729 18.557692 80.93655
#> 3   0    3     2       0      3            9  1.500000  2.12132
#> 4   0 1539 25915       0 123409     23521391  4.762068 29.74883
#> 5   0  122   595       0   4265       160581  7.168067 14.79428
#> 6   0  178   758       0   4277       148277  5.642480 12.80605
#> 7   0  886  1239       0   4972      1048908  4.012914 28.82956
#> 8   0  285   580       0   4166       265578  7.182759 20.17431
#>                            journal
#> 1                    plos medicine
#> 2                     plos biology
#> 3             plos clinical trials
#> 4                         plos one
#> 5                   plos pathogens
#> 6                    plos genetics
#> 7 plos neglected tropical diseases
#> 8       plos computational biology

More like this

solr_mlt is a function to return similar documents to the one

out <- solr_mlt(q = 'title:"ecology" AND body:"cell"', mlt.fl = 'title', mlt.mindf = 1, mlt.mintf = 1, fl = 'counter_total_all', rows = 5)
out$docs
#> Source: local data frame [5 x 2]
#> 
#>                             id counter_total_all
#>                          (chr)             (int)
#> 1 10.1371/journal.pbio.1001805             17081
#> 2 10.1371/journal.pbio.0020440             23882
#> 3 10.1371/journal.pone.0087217              5935
#> 4 10.1371/journal.pbio.1002191             13036
#> 5 10.1371/journal.pone.0040117              4316
out$mlt
#> $`10.1371/journal.pbio.1001805`
#>                             id counter_total_all
#> 1 10.1371/journal.pone.0082578              2196
#> 2 10.1371/journal.pone.0098876              2448
#> 3 10.1371/journal.pone.0102159              1177
#> 4 10.1371/journal.pcbi.1002652              3102
#> 5 10.1371/journal.pcbi.1003408              6942
#> 
#> $`10.1371/journal.pbio.0020440`
#>                             id counter_total_all
#> 1 10.1371/journal.pone.0102679              3112
#> 2 10.1371/journal.pone.0035964              5571
#> 3 10.1371/journal.pone.0003259              2800
#> 4 10.1371/journal.pntd.0003377              3392
#> 5 10.1371/journal.pone.0068814              7522
#> 
#> $`10.1371/journal.pone.0087217`
#>                             id counter_total_all
#> 1 10.1371/journal.pone.0131665               409
#> 2 10.1371/journal.pcbi.0020092             19604
#> 3 10.1371/journal.pone.0133941               475
#> 4 10.1371/journal.pone.0123774               997
#> 5 10.1371/journal.pone.0140306               322
#> 
#> $`10.1371/journal.pbio.1002191`
#>                             id counter_total_all
#> 1 10.1371/journal.pbio.1002232              1950
#> 2 10.1371/journal.pone.0131700               979
#> 3 10.1371/journal.pone.0070448              1608
#> 4 10.1371/journal.pone.0028737              7481
#> 5 10.1371/journal.pone.0052330              5595
#> 
#> $`10.1371/journal.pone.0040117`
#>                             id counter_total_all
#> 1 10.1371/journal.pone.0069352              2763
#> 2 10.1371/journal.pone.0148280               467
#> 3 10.1371/journal.pone.0035502              4031
#> 4 10.1371/journal.pone.0014065              5764
#> 5 10.1371/journal.pone.0113280              1984

Groups

solr_groups() is a function to return similar documents to the one

solr_group(q = 'ecology', group.field = 'journal', group.limit = 1, fl = c('id', 'alm_twitterCount'))
#>                         groupValue numFound start
#> 1                         plos one    25915     0
#> 2       plos computational biology      580     0
#> 3                     plos biology      884     0
#> 4                             none     1251     0
#> 5                    plos medicine      243     0
#> 6 plos neglected tropical diseases     1239     0
#> 7                   plos pathogens      595     0
#> 8                    plos genetics      758     0
#> 9             plos clinical trials        2     0
#>                             id alm_twitterCount
#> 1 10.1371/journal.pone.0059813               56
#> 2 10.1371/journal.pcbi.1003594               21
#> 3 10.1371/journal.pbio.1002358               16
#> 4 10.1371/journal.pone.0046671                2
#> 5 10.1371/journal.pmed.1000303                0
#> 6 10.1371/journal.pntd.0002577                2
#> 7 10.1371/journal.ppat.1003372                2
#> 8 10.1371/journal.pgen.1001197                0
#> 9 10.1371/journal.pctr.0020010                0

Parsing

solr_parse() is a general purpose parser function with extension methods for parsing outputs from functions in solr. solr_parse() is used internally within functions to do parsing after retrieving data from the server. You can optionally get back raw json, xml, or csv with the raw=TRUE, and then parse afterwards with solr_parse().

For example:

(out <- solr_highlight(q = 'alcohol', hl.fl = 'abstract', rows = 2, raw = TRUE))
#> [1] "{\"response\":{\"numFound\":20268,\"start\":0,\"docs\":[{},{}]},\"highlighting\":{\"10.1371/journal.pmed.0040151\":{\"abstract\":[\"Background: <em>Alcohol</em> consumption causes an estimated 4% of the global disease burden, prompting\"]},\"10.1371/journal.pone.0027752\":{\"abstract\":[\"Background: The negative influences of <em>alcohol</em> on TB management with regard to delays in seeking\"]}}}\n"
#> attr(,"class")
#> [1] "sr_high"
#> attr(,"wt")
#> [1] "json"

Then parse

solr_parse(out, 'df')
#>                          names
#> 1 10.1371/journal.pmed.0040151
#> 2 10.1371/journal.pone.0027752
#>                                                                                                    abstract
#> 1   Background: <em>Alcohol</em> consumption causes an estimated 4% of the global disease burden, prompting
#> 2 Background: The negative influences of <em>alcohol</em> on TB management with regard to delays in seeking

Please report any issues or bugs.

solrium/inst/doc/cores_collections.html0000644000176200001440000003745313176475640020116 0ustar liggesusers Cores/collections management

Cores/collections management

Installation

Stable version from CRAN

install.packages("solrium")

Or the development version from GitHub

install.packages("devtools")
devtools::install_github("ropensci/solrium")

Load

library("solrium")

Initialize connection

solr_connect()
#> <solr_connection>
#>   url:    http://localhost:8983
#>   errors: simple
#>   verbose: TRUE
#>   proxy:

Cores

There are many operations you can do on cores, including:

  • core_create() - create a core
  • core_exists() - check if a core exists
  • core_mergeindexes() - merge indexes
  • core_reload() - reload a core
  • core_rename() - rename a core
  • core_requeststatus() - check request status
  • core_split() - split a core
  • core_status() - check core status
  • core_swap() - core swap
  • core_unload() - delete a core

Create a core

core_create()

Delete a core

core_unload()

Collections

There are many operations you can do on collections, including:

  • collection_addreplica()
  • collection_addreplicaprop()
  • collection_addrole()
  • collection_balanceshardunique()
  • collection_clusterprop()
  • collection_clusterstatus()
  • collection_create()
  • collection_createalias()
  • collection_createshard()
  • collection_delete()
  • collection_deletealias()
  • collection_deletereplica()
  • collection_deletereplicaprop()
  • collection_deleteshard()
  • collection_list()
  • collection_migrate()
  • collection_overseerstatus()
  • collection_rebalanceleaders()
  • collection_reload()
  • collection_removerole()
  • collection_requeststatus()
  • collection_splitshard()

Create a collection

collection_create()

Delete a collection

collection_delete()
solrium/tests/0000755000176200001440000000000013176475641013124 5ustar liggesuserssolrium/tests/standard_mode/0000755000176200001440000000000012600402073015704 5ustar liggesuserssolrium/tests/standard_mode/test-core_create.R0000644000176200001440000000153112655420666021301 0ustar liggesuserscontext("core_create") test_that("core_create works", { solr_connect(verbose = FALSE) core_name <- "slamcore" # delete if exists if (core_exists(core_name)) { invisible(core_unload(core_name)) } # write files in preparation path <- sprintf("~/solr-5.4.1/server/solr/%s/conf", core_name) dir.create(path, recursive = TRUE) files <- list.files("~/solr-5.4.1/server/solr/configsets/data_driven_schema_configs/conf/", full.names = TRUE) invisible(file.copy(files, path, recursive = TRUE)) # create the core aa <- suppressMessages(core_create(name = core_name, instanceDir = core_name, configSet = "basic_configs")) expect_is(aa, "list") expect_is(aa$responseHeader, "list") # it worked expect_equal(aa$responseHeader$status, 0) # correct name expect_is(aa$core, "character") expect_equal(aa$core, core_name) }) solrium/tests/testthat/0000755000176200001440000000000013176475641014764 5ustar liggesuserssolrium/tests/testthat/test-update_atomic_xml.R0000644000176200001440000000314413167507346021562 0ustar liggesuserscontext("update_atomic_xml") library(xml2) test_that("update_atomic_xml works", { skip_on_cran() if (conn$collection_exists("books")) { conn$collection_delete("books") } conn$collection_create("books") # Add documents file <- system.file("examples", "books.xml", package = "solrium") invisible(conn$update_xml(file, "books")) # get a document res1 <- conn$get(ids = '978-0641723445', "books", wt = "xml") res1_genre <- xml2::xml_text( xml2::xml_find_all(res1, '//doc//str[@name="genre_s"]')) res1_pages <- xml2::xml_text( xml2::xml_find_all(res1, '//doc//int[@name="pages_i"]')) # atomic update body <- ' 978-0641723445 mystery 1 ' aa <- conn$update_atomic_xml(body, name="books") # get the document again res2 <- conn$get(ids = '978-0641723445', "books", wt = "xml") res2_genre <- xml2::xml_text( xml2::xml_find_all(res2, '//doc//str[@name="genre_s"]')) res2_pages <- xml2::xml_text( xml2::xml_find_all(res2, '//doc//int[@name="pages_i"]')) expect_is(aa, "list") expect_named(aa, c("responseHeader")) expect_is(res1, "xml_document") expect_equal(res1_genre, "fantasy") expect_equal(res1_pages, "384") expect_is(res2, "xml_document") expect_equal(res2_genre, "mystery") expect_equal(res2_pages, "385") }) test_that("update_atomic_xml fails well", { expect_error(update_atomic_xml(), "argument \"conn\" is missing") expect_error(update_atomic_xml(5), "conn must be a SolrClient object") }) solrium/tests/testthat/test-client.R0000644000176200001440000000236313167507346017344 0ustar liggesuserscontext("SolrClient") test_that("SolrClient to remote Solr server works", { skip_on_cran() aa <- SolrClient$new(host = 'api.plos.org', path = 'search', port = NULL) expect_is(aa, "SolrClient") expect_is(aa$host, "character") expect_null(aa$proxy) expect_is(aa$errors, "character") expect_true(all(c('host', 'proxy', 'errors') %in% names(aa))) }) test_that("SolrClient to local Solr server works", { skip_on_cran() bb <- SolrClient$new() expect_is(bb, "SolrClient") expect_is(bb$host, "character") expect_null(bb$proxy) expect_is(bb$errors, "character") expect_true(all(c('host', 'proxy', 'errors') %in% names(bb))) }) test_that("SolrClient works with a proxy", { skip_on_cran() port <- 3128 proxy <- list(url = "187.62.207.130", port = port) cc <- SolrClient$new(proxy = proxy) expect_is(cc, "SolrClient") expect_is(cc$host, "character") expect_is(cc$proxy, "proxy") expect_is(cc$proxy$proxy, "character") }) test_that("SolrClient fails well", { skip_on_cran() #expect_error(SolrClient$new(host = "foobar"), "That does not appear to be a url") expect_error(SolrClient$new(errors = 'foo'), "errors must be one of") expect_error(SolrClient$new(proxy = list(foo = "bar")), "proxy URL not") }) solrium/tests/testthat/test-delete.R0000644000176200001440000000310313176461330017311 0ustar liggesuserscontext("delete_by_id") test_that("delete by ", { skip_on_cran() if (!collection_exists(conn, "gettingstarted")) { collection_create(conn, name = "gettingstarted", numShards = 1) } ss <- list(list(id = 1, price = 100), list(id = 2, price = 500), list(id = 3, price = 100), list(id = 4, price = 500)) invisible(add(ss, conn, name = "gettingstarted")) # single id aa <- conn$delete_by_id(ids = 1, "gettingstarted") expect_is(aa, "list") expect_named(aa, c("responseHeader")) # many ids aa <- conn$delete_by_id(ids = c(3, 4), "gettingstarted") expect_is(aa, "list") expect_named(aa, c("responseHeader")) res <- conn$get(ids = 3:4, "gettingstarted") expect_equal(length(res$response$docs), 0) }) context("delete_by_query") test_that("delete by many ids", { skip_on_cran() ss <- list(list(id = 10, title = "adfadsf"), list(id = 12, title = "though"), list(id = 13, title = "cheese"), list(id = 14, title = "animals")) invisible(add(ss, conn, name = "gettingstarted")) aa <- conn$delete_by_query(query = "title:cheese", "gettingstarted") expect_is(aa, "list") expect_named(aa, c("responseHeader")) res <- conn$search("gettingstarted", params = list(q = "title:cheese")) expect_equal(NROW(res), 0) }) test_that("delete fails well", { skip_on_cran() expect_error(delete_by_id(), "argument \"conn\" is missing") expect_error(delete_by_query(), "argument \"conn\" is missing") expect_error(delete_by_id(5), "conn must be a SolrClient object") expect_error(delete_by_query(5), "conn must be a SolrClient object") }) solrium/tests/testthat/helper-solrium.R0000644000176200001440000000157613167524103020053 0ustar liggesusersconn <- SolrClient$new() conn_plos <- SolrClient$new(host = "api.plos.org", path = "search", port = NULL) conn_simp <- SolrClient$new(host = 'api.plos.org', path = 'search', port = NULL) conn_comp <- SolrClient$new(host = 'api.plos.org', path = 'search', port = NULL, errors = "complete") conn_hathi <- SolrClient$new( host = "chinkapin.pti.indiana.edu", path = "solr/meta/select", port = 9994) conn_dc <- SolrClient$new(host = "search.datacite.org", path = "api", port = NULL) conn_dryad <- SolrClient$new(host = "datadryad.org", path = "solr/search/select", port = NULL) # cloud mode: create collection "gettingstarted" up <- tryCatch(conn$collection_exists("gettingstarted"), error = function(e) e) if (!inherits(up, "error")) { if (!conn$collection_exists("gettingstarted")) { conn$collection_create("gettingstarted") } } solrium/tests/testthat/test-solr_get.R0000644000176200001440000000217613176462110017673 0ustar liggesuserscontext("get") test_that("get works with a single id", { skip_on_cran() if (!collection_exists(conn, "gettingstarted")) { collection_create(conn, name = "gettingstarted", numShards = 1) } ss <- list(list(id = 1, price = 100), list(id = 2, price = 500)) invisible(add(ss, conn, name = "gettingstarted")) aa <- solr_get(conn, ids = 1, "gettingstarted") expect_is(aa, "list") expect_named(aa, c("response")) expect_named(aa$response, c("numFound", "start", "docs")) expect_is(aa$response$docs, "data.frame") aa <- solr_get(conn, ids = c(1, 2), "gettingstarted") expect_is(aa, "list") expect_named(aa, c("response")) expect_equal(NROW(aa$response$docs), 2) aa <- solr_get(conn, ids = "1,2", "gettingstarted") expect_is(aa, "list") expect_named(aa, c("response")) expect_equal(NROW(aa$response$docs), 2) aa <- conn$get(1, "gettingstarted") expect_is(aa, "list") expect_named(aa$response, c("numFound", "start", "docs")) }) test_that("get fails well", { skip_on_cran() expect_error(solr_get(), "argument \"conn\" is missing") expect_error(solr_get(5), "conn must be a SolrClient object") }) solrium/tests/testthat/test-update_xml.R0000644000176200001440000000166213176460462020226 0ustar liggesuserscontext("update_xml") test_that("update_xml works", { skip_on_cran() file <- system.file("examples", "books.xml", package = "solrium") if (!conn$collection_exists("books")) conn$collection_create("books") aa <- conn$update_xml(files = file, name = "books") expect_is(aa, "list") expect_named(aa, c("responseHeader")) expect_true(conn$collection_exists("books")) }) test_that("update_xml works with old format", { skip_on_cran() file <- system.file("examples", "books.xml", package = "solrium") if (!conn$collection_exists("books")) conn$collection_create("books") aa <- update_xml(conn, files = file, name = "books") expect_is(aa, "list") expect_named(aa, c("responseHeader")) expect_true(conn$collection_exists("books")) }) test_that("update_xml fails well", { skip_on_cran() expect_error(update_xml(), "argument \"conn\" is missing") expect_error(update_xml(5), "conn must be a SolrClient object") }) solrium/tests/testthat/test-solr_all.R0000644000176200001440000000611613176460660017672 0ustar liggesuserscontext("solr_all") test_that("solr_all works", { skip_on_cran() a <- conn_plos$all(params = list(q='*:*', rows=2, fl='id')) # correct dimensions expect_equal(length(a), 6) # correct classes expect_is(a, "list") expect_is(a$search, "tbl_df") # right slot names expect_named(a, c('search','facet','high','mlt','group','stats')) }) test_that("solr_all fails well", { skip_on_cran() expect_error(conn_plos$all(params = list(q = "*:*", rows = "asdf")), "rows should be a numeric or integer class value") Sys.sleep(2) expect_error(conn_plos$all(params = list(q = "*:*", sort = "down")), "400 - Can't determine a Sort Order \\(asc or desc\\) in sort spec 'down'") Sys.sleep(2) expect_error(conn_plos$all(params = list(q='*:*', fl=c('alm_twitterCount','id'), fq='alm_notafield:[5 TO 50]', rows=10)), "undefined field") expect_error(conn_plos$all(params = list(q = "*:*", wt = "foobar")), "wt must be one of: json, xml, csv") }) test_that("solr_all works with Datacite", { skip_on_cran() a <- conn_dc$all(params = list(q = '*:*', rows = 2)) b <- conn_dc$all(params = list(q = 'publisher:Data', rows = 5)) # correct dimensions expect_equal(NROW(a$search), 2) expect_equal(NROW(b$search), 5) }) test_that("solr_all old style works", { skip_on_cran() expect_is(solr_all(conn_plos, params = list(q='*:*', rows=2, fl='id')), "list" ) }) test_that("solr_all optimize max rows with lower boundary", { skip_on_cran() a <- conn_plos$all(params = list(q='*:*', rows=1, fl='id')) query <- paste0('id:', a$search$id) b <- conn_plos$all(params = list(q=query, rows=1, fl='id')) cc <- conn_plos$all(params = list(q=query, rows=-1, fl='id')) expect_identical(b, cc) }) test_that("solr_all optimize max rows with upper boundary", { skip_on_cran() a <- conn_plos$all(params = list(q='*:*', rows=1, fl='id')) query <- paste0('id:', a$search$id) b <- conn_plos$all(params = list(q=query, rows=1, fl='id')) c <- conn_plos$all(params = list(q=query, rows=50000, fl='id')) expect_identical(b, c) }) test_that("solr_all optimize max rows with rows higher than upper boundary", { skip_on_cran() a <- conn_plos$all(params = list(q='*:*', rows=1, fl='id')) query <- paste0('id:', a$search$id) b <- conn_plos$all(params = list(q=query, rows=1, fl='id')) c <- conn_plos$all(params = list(q=query, rows=50001, fl='id')) expect_identical(b, c) }) test_that("solr_all optimize max rows with rows=31 and minOptimizedRows=30", { skip_on_cran() a <- conn_plos$all(params = list(q='*:*', rows=1, fl='id')) query <- paste0('id:', a$search$id) b <- conn_plos$all(params = list(q=query, rows=1, fl='id')) c <- conn_plos$all(params = list(q=query, rows=31, fl='id'), optimizeMaxRows=TRUE, minOptimizedRows=30) expect_identical(b, c) }) test_that("solr_all fails if optimize max rows is disabled with rows equal to -1", { skip_on_cran() expect_error( conn_plos$all(params = list(q='*:*', rows=-1, fl='id'), optimizeMaxRows=FALSE), "'rows' parameter cannot be negative" ) }) solrium/tests/testthat/test-schema.R0000644000176200001440000000265213176460671017326 0ustar liggesuserscontext("schema - cloud mode") test_that("both R6 and normal function call work", { skip_on_cran() expect_is(conn$schema, "function") expect_equal(names(formals(schema))[1], "conn") }) test_that("schema works against", { skip_on_cran() skip_if_not(!is_in_cloud_mode(conn)) aa <- conn$schema(name = "gettingstarted") bb <- conn$schema(name = "gettingstarted", what = "fields") expect_is(conn$schema(name = "gettingstarted", "dynamicfields"), "list") expect_is(conn$schema(name = "gettingstarted", "fieldtypes"), "list") expect_is(conn$schema(name = "gettingstarted", "copyfields"), "list") expect_is(conn$schema(name = "gettingstarted", "name"), "list") expect_is(conn$schema(name = "gettingstarted", "version"), "list") expect_is(conn$schema(name = "gettingstarted", "uniquekey"), "list") expect_is(conn$schema(name = "gettingstarted", "similarity"), "list") expect_is(aa, "list") expect_is(aa$responseHeader, "list") expect_is(aa$schema, "list") expect_is(aa$schema$name, "character") expect_is(bb, "list") expect_is(bb$fields, "data.frame") }) test_that("schema fails well", { skip_on_cran() skip_if_not(!is_in_cloud_mode(conn)) expect_error(conn$schema(), "argument \"name\" is missing") expect_error(conn$schema(name = "gettingstarted", "stuff"), "Not Found") }) test_that("schema old style works", { skip_on_cran() expect_is(schema(conn, name = "gettingstarted"), "list" ) }) solrium/tests/testthat/test-solr_highlight.r0000644000176200001440000000204413176460553021126 0ustar liggesuserscontext("solr_highlight") test_that("solr_highlight works", { skip_on_cran() a <- conn_plos$highlight(params = list(q='alcohol', hl.fl = 'abstract', rows=10)) Sys.sleep(2) b <- conn_plos$highlight(params = list(q='alcohol', hl.fl = c('abstract','title'), rows=3)) # correct dimensions expect_that(NROW(a), equals(10)) expect_that(NCOL(a), equals(2)) expect_that(NROW(b), equals(3)) expect_that(NCOL(b), equals(3)) # correct classes expect_is(a, "tbl_df") expect_is(a$abstract, "character") expect_is(b, "tbl_df") expect_is(b$abstract, "character") expect_is(b$title, "character") }) test_that("solr_highlight old style works", { skip_on_cran() expect_is(solr_highlight(conn_plos, params = list(q='alcohol', hl.fl = 'abstract', rows=10)), "tbl_df" ) expect_is(solr_highlight(conn_plos, params = list(q='alcohol', hl.fl = c('abstract','title'), rows=3)), "tbl_df" ) }) solrium/tests/testthat/test-update_json.R0000644000176200001440000000167613167507346020407 0ustar liggesuserscontext("update_json") test_that("update_json works", { skip_on_cran() file <- system.file("examples", "books2.json", package = "solrium") if (!conn$collection_exists("books")) conn$collection_create("books") aa <- conn$update_json(files = file, name = "books") expect_is(aa, "list") expect_named(aa, c("responseHeader")) expect_true(conn$collection_exists("books")) }) test_that("update_json works with old format", { skip_on_cran() file <- system.file("examples", "books2.json", package = "solrium") if (!conn$collection_exists("books")) conn$collection_create("books") aa <- update_json(conn, files = file, name = "books") expect_is(aa, "list") expect_named(aa, c("responseHeader")) expect_true(conn$collection_exists("books")) }) test_that("update_json fails well", { skip_on_cran() expect_error(update_json(), "argument \"conn\" is missing") expect_error(update_json(5), "conn must be a SolrClient object") }) solrium/tests/testthat/test-add.R0000644000176200001440000000204613176461147016612 0ustar liggesuserscontext("add") test_that("add works with a list and data.frame", { skip_on_cran() if (!collection_exists(conn, "books")) { collection_create(conn, name = "books") } ss <- list(list(id = 1, price = 100), list(id = 2, price = 500)) aa <- add(ss, conn, name = "books") expect_is(aa, "list") expect_named(aa, c("responseHeader")) expect_is(conn$get(c(1, 2), "books"), "list") expect_named(conn$get(c(1, 2), "books"), "response") df <- data.frame(id = c(67, 68), price = c(1000, 500000000)) aa <- add(df, conn, "books") expect_is(aa, "list") expect_named(aa, c("responseHeader")) }) test_that("add works with new interface", { skip_on_cran() ss <- list(list(id = 1, price = 100), list(id = 2, price = 500)) aa <- conn$add(ss, name = "books") expect_is(aa, "list") expect_named(aa, c("responseHeader")) }) test_that("add fails well", { skip_on_cran() expect_error(add(), "no applicable method") expect_error(add(5), "no applicable method") expect_error(add(mtcars, 4), "conn must be a SolrClient object") }) solrium/tests/testthat/test-update_atomic_json.R0000644000176200001440000000217713167507346021740 0ustar liggesuserscontext("update_atomic_json") test_that("update_atomic_json works", { skip_on_cran() if (!conn$collection_exists("books")) { conn$collection_delete("books") conn$collection_create("books") } file <- system.file("examples", "books2.json", package = "solrium") invisible(conn$update_json(file, "books")) # get a document res1 <- conn$get(ids = 343334534545, "books") # atomic update body <- '[{ "id": "343334534545", "genre_s": {"set": "mystery" }, "pages_i": {"inc": 1 } }]' aa <- conn$update_atomic_json(body, "books") # get the document after updating res2 <- conn$get(ids = 343334534545, "books") expect_is(aa, "list") expect_named(aa, c("responseHeader")) expect_is(res1$response$docs, "data.frame") expect_equal(res1$response$docs$genre_s, "fantasy") expect_equal(res1$response$docs$pages_i, 384) expect_is(res2$response$docs, "data.frame") expect_equal(res2$response$docs$pages_i, 385) }) test_that("update_atomic_json fails well", { expect_error(update_atomic_json(), "argument \"conn\" is missing") expect_error(update_atomic_json(5), "conn must be a SolrClient object") }) solrium/tests/testthat/test-core_create.R0000644000176200001440000000167413167507346020345 0ustar liggesuserscontext("core_create") test_that("core_create works", { skip_on_cran() skip_on_travis() skip_if_not(is_not_in_cloud_mode(conn)) core_name <- "slamcore" # delete if exists if (conn$core_exists(core_name)) { invisible(conn$core_unload(core_name)) } # write files in preparation path <- sprintf("~/solr-7.0.0/server/solr/%s/conf", core_name) dir.create(path, recursive = TRUE, showWarnings = FALSE) files <- list.files("~/solr-7.0.0/server/solr/configsets/sample_techproducts_configs/conf/", full.names = TRUE) invisible(file.copy(files, path, recursive = TRUE)) # create the core aa <- suppressMessages(conn$core_create( name = core_name, instanceDir = core_name, configSet = "basic_configs")) expect_is(aa, "list") expect_is(aa$responseHeader, "list") # it worked expect_equal(aa$responseHeader$status, 0) # correct name expect_is(aa$core, "character") expect_equal(aa$core, core_name) }) solrium/tests/testthat/test-solr_facet.r0000644000176200001440000000507213176460645020247 0ustar liggesuserscontext("solr_facet") test_that("solr_facet works", { skip_on_cran() a <- conn_plos$facet(params = list(q='*:*', facet.field='journal')) Sys.sleep(2) # FIXME: this doesn't work anymore # b <- conn_plos$facet(params = list(q='*:*', facet.date='publication_date', # facet.date.start='NOW/DAY-90DAYS', facet.date.end='NOW', # facet.date.gap='+1DAY')) c <- conn_plos$facet(params = list(q='alcohol', facet.pivot='journal,subject', facet.pivot.mincount=10)) # correct dimenions expect_equal(length(a), 5) expect_equal(length(a$facet_queries), 0) expect_equal(NCOL(a$facet_fields$journal), 2) # expect_that(length(b), equals(5)) # expect_that(length(b$facet_dates), equals(1)) # expect_that(dim(b$facet_dates$publication_date), equals(c(6,2))) expect_equal(length(c), 5) expect_equal(names(c$facet_pivot), c('journal', 'journal,subject')) expect_equal(names(c$facet_pivot$journal), c('journal', 'count')) expect_equal(names(c$facet_pivot$`journal,subject`), c('journal', 'subject', 'count')) expect_true(min(unlist(c$facet_pivot$`journal,subject`$count)) >= 10) # correct classes expect_is(a, "list") # expect_is(b, "list") expect_is(c, "list") # expect_is(b$facet_dates, "list") # expect_is(b$facet_dates$publication_date, "data.frame") expect_is(c$facet_pivot, "list") expect_is(c$facet_pivot$journal, "data.frame") expect_is(c$facet_pivot$`journal,subject`, "data.frame") }) # test_that("faceting works against HathiTrust", { # # regular facet # a <- conn_hathi$facet(params = list(q = '*:*', facet.field = 'genre')) # # pivot facet # c <- conn_hathi$facet(params = list(q = '*:*', facet.pivot = 'genre,publisher', # facet.pivot.mincount = 10)) # expect_equal(length(a), 5) # expect_equal(length(a$facet_queries), 0) # expect_equal(NCOL(a$facet_fields$genre), 2) # expect_equal(length(c), 5) # expect_equal(names(c$facet_pivot), c('genre', 'genre,publisher')) # expect_named(c$facet_pivot$genre, c('genre', 'count')) # expect_named(c$facet_pivot$`genre,publisher`, c('genre', 'publisher', 'count')) # expect_true(min(unlist(c$facet_pivot$`genre,publisher`$count)) >= 10) # # correct classes # expect_is(a, "list") # expect_is(c, "list") # expect_is(c$facet_pivot, "list") # expect_is(c$facet_pivot$genre, "data.frame") # expect_is(c$facet_pivot$`genre,publisher`, "data.frame") # }) test_that("solr_facet old style works", { skip_on_cran() expect_is(solr_facet(conn_plos, params = list(q='*:*', facet.field='journal')), "list" ) }) solrium/tests/testthat/test-errors.R0000644000176200001440000000254513170226156017373 0ustar liggesusers# errors context("errors") test_that("setting errors level gives correct error classes", { skip_on_cran() expect_is(conn_simp, "SolrClient") expect_is(conn_comp, "SolrClient") expect_is(conn_simp$errors, "character") expect_is(conn_comp$errors, "character") }) test_that("setting errors level gives correct error values", { skip_on_cran() expect_equal(conn_plos$errors, "simple") expect_equal(conn_simp$errors, "simple") expect_equal(conn_comp$errors, "complete") }) test_that("setting error levels gives correct effect - simple errors", { skip_on_cran() expect_error(conn_simp$search(params = list(q = "*:*", rows = "asdf")), "rows should be a numeric or integer class value") expect_error(conn_simp$search(params = list(q = "*:*", rows = "asdf")), "rows should be a numeric or integer class value") }) test_that("setting error levels gives correct effect - complete errors", { skip_on_cran() expect_error(conn_comp$search(params = list(q = "*:*", rows = "asdf")), "rows should be a numeric or integer class value") expect_error(conn_comp$search(params = list(q = "*:*", start = "asdf")), "500 - For input string: \"asdf\"") expect_error(conn_comp$search(params = list(q = "*:*", sort = "down")), "400 - Can't determine a Sort Order \\(asc or desc\\) in sort spec 'down'") }) solrium/tests/testthat/test-update_csv.R0000644000176200001440000000163213167507346020221 0ustar liggesuserscontext("update_csv") df <- data.frame(id=1:3, name=c('red', 'blue', 'green')) write.csv(df, file="df.csv", row.names=FALSE, quote = FALSE) test_that("update_csv works", { skip_on_cran() if (!conn$collection_exists("books")) conn$collection_create("books") aa <- conn$update_csv("df.csv", name = "books") expect_is(aa, "list") expect_named(aa, c("responseHeader")) expect_true(conn$collection_exists("books")) }) test_that("update_csv works with old format", { skip_on_cran() if (!conn$collection_exists("books")) conn$collection_create("books") aa <- update_csv(conn, "df.csv", name = "books") expect_is(aa, "list") expect_named(aa, c("responseHeader")) expect_true(conn$collection_exists("books")) }) test_that("update_csv fails well", { skip_on_cran() expect_error(update_csv(), "argument \"conn\" is missing") expect_error(update_csv(5), "conn must be a SolrClient object") }) solrium/tests/testthat/test-solr_search.r0000644000176200001440000000750613176460514020431 0ustar liggesuserscontext("solr_search") test_that("solr_search works", { skip_on_cran() a <- conn_plos$search(params = list(q='*:*', rows=2, fl='id')) Sys.sleep(2) b <- conn_plos$search(params = list(q='title:"ecology" AND body:"cell"', fl='title', rows=5)) Sys.sleep(2) # correct dimensions expect_that(length(a), equals(1)) expect_that(length(b), equals(1)) # correct classes expect_is(a, "data.frame") expect_is(b, "data.frame") expect_is( solr_search(conn_plos, params = list(q='*:*', rows=2, fl='id')), "tbl_df") expect_is( solr_search(conn_plos, params = list(q='title:"ecology" AND body:"cell"', fl='title', rows=5)), "tbl_df") }) test_that("solr_search fails well", { skip_on_cran() expect_error(conn_plos$search(params = list(q = "*:*", rows = "asdf")), "rows should be a numeric or integer") expect_error(solr_search(conn_plos, params = list(q = "*:*", rows = "asdf")), "rows should be a numeric or integer") expect_error(conn_plos$search(params = list(q = "*:*", sort = "down")), "400 - Can't determine a Sort Order \\(asc or desc\\) in sort spec 'down'") expect_error(conn_plos$search(params = list(q='*:*', fl=c('alm_twitterCount','id'), fq='alm_notafield:[5 TO 50]', rows=10)), "undefined field") expect_error(conn_plos$search(params = list(q = "*:*", wt = "foobar")), "wt must be one of: json, xml, csv") }) test_that("solr_search works with Dryad", { skip_on_cran() a <- conn_dryad$search(params = list(q = '*:*', rows = 2)) Sys.sleep(2) b <- conn_dryad$search(params = list(q = 'dc.title.en:ecology', rows = 5)) # correct dimensions expect_equal(NROW(a), 2) expect_equal(NROW(b), 5) # correct classes expect_is(a, "data.frame") expect_is(a, "tbl_df") expect_is(b, "data.frame") expect_is(b, "tbl_df") # correct content expect_true(all(grepl("ecolog", b$dc.title.en, ignore.case = TRUE))) # solr_search expect_is(solr_search(conn_dryad, params = list(q = '*:*', rows = 2)), "tbl_df") expect_is( solr_search(conn_dryad, params = list(q = 'dc.title.en:ecology', rows = 5)), "tbl_df") }) test_that("solr_search optimize max rows with lower boundary", { skip_on_cran() a <- conn_plos$search(params = list(q='*:*', rows=1, fl='id')) query <- paste0('id:', a$id) b <- conn_plos$search(params = list(q=query, rows=1, fl='id')) cc <- conn_plos$search(params = list(q=query, rows=-1, fl='id')) expect_identical(b, cc) }) test_that("solr_search optimize max rows with upper boundary", { skip_on_cran() a <- conn_plos$search(params = list(q='*:*', rows=1, fl='id')) query <- paste0('id:', a$id) b <- conn_plos$search(params = list(q=query, rows=1, fl='id')) c <- conn_plos$search(params = list(q=query, rows=50000, fl='id')) expect_identical(b, c) }) test_that("solr_search optimize max rows with rows higher than upper boundary", { skip_on_cran() a <- conn_plos$search(params = list(q='*:*', rows=1, fl='id')) query <- paste0('id:', a$id) b <- conn_plos$search(params = list(q=query, rows=1, fl='id')) c <- conn_plos$search(params = list(q=query, rows=50001, fl='id')) expect_identical(b, c) }) test_that("solr_search optimize max rows with rows=31 and minOptimizedRows=30", { skip_on_cran() a <- conn_plos$search(params = list(q='*:*', rows=1, fl='id')) query <- paste0('id:', a$id) b <- conn_plos$search(params = list(q=query, rows=1, fl='id')) c <- conn_plos$search(params = list(q=query, rows=31, fl='id'), optimizeMaxRows=TRUE, minOptimizedRows=30) expect_identical(b, c) }) test_that("solr_search fails if optimize max rows is disabled with rows equal to -1", { skip_on_cran() expect_error( conn_plos$search(params = list(q='*:*', rows=-1, fl='id'), optimizeMaxRows=FALSE), "'rows' parameter cannot be negative" ) }) solrium/tests/testthat/test-solr_error.R0000644000176200001440000000246513167507346020261 0ustar liggesuserscontext("solr_error internal function") test_that("solr_error works when no errors", { skip_on_cran() aa <- conn_simp$search(params = list(q = '*:*', rows = 2, fl = 'id')) expect_equal(conn$errors, "simple") expect_is(aa, "data.frame") expect_is(aa$id, "character") aa <- solr_search(conn_simp, params = list(q = '*:*', rows = 2, fl = 'id')) expect_equal(conn$errors, "simple") expect_is(aa, "data.frame") expect_is(aa$id, "character") }) test_that("solr_error works when there should be errors - simple errors", { skip_on_cran() expect_equal(conn_simp$errors, "simple") expect_error(conn_simp$search(params = list(q = '*:*', rows = 5, sort = "things")), "Can't determine a Sort Order") }) test_that("solr_error works when there should be errors - complete errors", { skip_on_cran() expect_equal(conn_comp$errors, "complete") expect_error(conn_comp$search(params = list(q = '*:*', rows = 5, sort = "things")), "Can't determine a Sort Order") }) test_that("solr_error - test directly", { skip_on_cran() library(crul) res <- crul::HttpClient$new(url = "http://api.plos.org/search?wt=json&q=%22synthetic%20biology%22&rows=10&fl=id,title&sort=notasortoption")$get() expect_error(solrium:::solr_error(res), "Can't determine a Sort Order \\(asc or desc\\)") }) solrium/tests/testthat/test-solr_goup.R0000644000176200001440000000336313176460622020073 0ustar liggesuserscontext("solr_group") test_that("solr_group works", { skip_on_cran() a <- conn_plos$group(params = list(q='ecology', group.field='journal', group.limit=3, fl=c('id','score'))) Sys.sleep(2) b <- conn_plos$group(params = list(q='ecology', group.field='journal', group.limit=3, fl=c('id','score','alm_twitterCount'), group.sort='alm_twitterCount desc')) Sys.sleep(2) out <- conn_plos$group(params = list(q='ecology', group.field=c('journal','article_type'), group.limit=3, fl='id'), raw=TRUE) Sys.sleep(2) c <- out d <- solr_parse(out, 'df') e <- conn_plos$group(params = list(q='ecology', group.field='journal', group.limit=3, fl=c('id','score'), group.format='grouped', group.main='true')) suppressPackageStartupMessages(library('jsonlite', quietly = TRUE)) f <- jsonlite::fromJSON(out, FALSE) # correct dimensions expect_equal(NCOL(a), 5) expect_equal(NCOL(b), 6) expect_that(length(c), equals(1)) expect_that(length(d), equals(2)) expect_equal(NCOL(d$article_type), 4) expect_equal(NCOL(e), 4) expect_that(length(f), equals(1)) expect_that(length(f$grouped), equals(2)) # correct classes expect_is(a, "data.frame") expect_is(b, "data.frame") expect_is(c, "sr_group") expect_is(d, "list") expect_is(d$journal, "data.frame") expect_is(e, "data.frame") }) test_that("solr_group old style works", { skip_on_cran() expect_is(solr_group(conn_plos, params = list(q='ecology', group.field='journal', group.limit=3, fl=c('id','score'))), "data.frame" ) expect_is(solr_group(conn_plos, params = list(q='ecology', group.field='journal', group.limit=3, fl=c('id','score'), group.format='grouped', group.main='true')), "data.frame" ) }) solrium/tests/testthat/test-collections.R0000644000176200001440000000254413176412506020376 0ustar liggesuserscontext("collections") test_that("collections works - no collections", { skip_on_cran() skip_if_not(is_in_cloud_mode(conn)) if (conn$collection_exists("books")) conn$collection_delete("books") if (conn$collection_exists("gettingstarted")) conn$collection_delete("gettingstarted") aa <- collections(conn) expect_is(aa, "character") expect_false("books" %in% aa) expect_false("gettingstarted" %in% aa) }) test_that("collections works - with some collections", { skip_on_cran() skip_if_not(is_in_cloud_mode(conn)) if (!conn$collection_exists("books")) conn$collection_create("books") if (!conn$collection_exists("gettingstarted")) conn$collection_create("gettingstarted") aa <- collections(conn) expect_is(aa, "character") expect_true("books" %in% aa) expect_true("gettingstarted" %in% aa) }) test_that("collections works - new way of using", { skip_on_cran() skip_if_not(is_in_cloud_mode(conn)) if (!conn$collection_exists("books")) conn$collection_create("books") if (!conn$collection_exists("gettingstarted")) conn$collection_create("gettingstarted") aa <- conn$collection_list() expect_is(aa, "list") expect_named(aa, c('responseHeader', 'collections')) }) test_that("collections fails well", { expect_error(collections(), "argument \"conn\" is missing") expect_error(collections(5), "conn must be a SolrClient") }) solrium/tests/testthat/test-solr_mlt.r0000644000176200001440000000644413176465105017761 0ustar liggesuserscontext("solr_mlt") test_that("solr_mlt works", { skip_on_cran() a <- conn_plos$mlt(params = list(q='*:*', mlt.count=2, mlt.fl='abstract', fl='score', fq="doc_type:full")) Sys.sleep(2) c <- conn_plos$mlt(params = list(q='ecology', mlt.fl='abstract', fl='title', rows=5)) Sys.sleep(2) out <- conn_plos$mlt(params = list(q='ecology', mlt.fl='abstract', fl='title', rows=2, wt="xml"), raw=TRUE) library("xml2") outxml <- read_xml(unclass(out)) outdf <- solr_parse(out, "df") # correct dimensions expect_equal(dim(a$docs), c(10,2)) expect_equal(dim(c$docs), c(5, 2)) expect_equal(length(c$mlt), 5) expect_equal(length(outxml), 2) expect_equal(dim(outdf$mlt[[1]]), c(5, 5)) # correct classes expect_is(a, "list") # expect_is(b, "list") expect_is(c, "list") expect_is(a$docs, "data.frame") # expect_is(b$mlt, "data.frame") expect_is(c$docs, "data.frame") expect_is(outxml, "xml_document") expect_is(outdf, "list") expect_is(outdf$mlt[[1]], "data.frame") }) test_that("solr_mlt old style works", { skip_on_cran() expect_is( solr_mlt(conn_plos, params = list(q='*:*', mlt.count=2, mlt.fl='abstract', fl='score', fq="doc_type:full")), "list" ) expect_is( solr_mlt(conn_plos, params = list(q='ecology', mlt.fl='abstract', fl='title', rows=5)), "list" ) }) test_that("solr_mlt optimize max rows with lower boundary", { skip_on_cran() a <- conn_plos$mlt(params = list(q='*:*', mlt.count=2, mlt.fl='abstract', rows=1)) query <- paste0('id:', a$docs$id) b <- conn_plos$mlt(params = list(q=query, mlt.count=2, mlt.fl='abstract', rows=1)) cc <- conn_plos$mlt(params = list(q=query, mlt.count=2, mlt.fl='abstract', rows=-1)) expect_identical(b, cc) }) test_that("solr_mlt optimize max rows with upper boundary", { skip_on_cran() a <- conn_plos$mlt(params = list(q='*:*', mlt.count=2, mlt.fl='abstract', rows=1)) query <- paste0('id:', a$docs$id) b <- conn_plos$mlt(params = list(q=query, mlt.count=2, mlt.fl='abstract', rows=1)) c <- conn_plos$mlt(params = list(q=query, mlt.count=2, mlt.fl='abstract', rows=50000)) expect_identical(b, c) }) test_that("solr_mlt optimize max rows with rows higher than upper boundary", { skip_on_cran() a <- conn_plos$mlt(params = list(q='ecology', mlt.count=2, mlt.fl='abstract', rows=1)) query <- paste0('id:', a$docs$id) b <- conn_plos$mlt(params = list(q=query, mlt.count=2, mlt.fl='abstract', rows=1)) c <- conn_plos$mlt(params = list(q=query, mlt.count=2, mlt.fl='abstract', rows=50001)) expect_identical(b, c) }) test_that("solr_mlt optimize max rows with rows=31 and minOptimizedRows=30", { skip_on_cran() a <- conn_plos$mlt(params = list(q='*:*', mlt.count=2, mlt.fl='abstract', rows=1)) query <- paste0('id:', a$docs$id) b <- conn_plos$mlt(params = list(q=query, mlt.count=2, mlt.fl='abstract', rows=1)) c <- conn_plos$mlt(params = list(q=query, mlt.count=2, mlt.fl='abstract', rows=31), optimizeMaxRows=TRUE, minOptimizedRows=30) expect_identical(b, c) }) test_that("solr_mlt fails if optimize max rows is disabled with rows equal to -1", { skip_on_cran() expect_error( conn_plos$mlt(params = list(q='*:*', mlt.count=2, mlt.fl='abstract', rows=-1), optimizeMaxRows=FALSE), "'rows' parameter cannot be negative" ) }) solrium/tests/testthat/test-solr_stats.r0000644000176200001440000000477413176430764020332 0ustar liggesuserscontext("solr_stats") test_that("solr_stats works", { skip_on_cran() a <- conn_plos$stats(params = list(q='science', stats.field='counter_total_all'), raw=TRUE) Sys.sleep(2) b <- conn_plos$stats(params = list(q='ecology', stats.field=c('counter_total_all','alm_twitterCount'), stats.facet=c('journal','volume'))) Sys.sleep(2) c <- conn_plos$stats(params = list(q='ecology', stats.field=c('counter_total_all','alm_twitterCount'), stats.facet=c('journal','volume')), raw=TRUE) Sys.sleep(2) d <- solr_parse(c) # list e <- solr_parse(c, 'df') # data.frame # correct dimenions expect_equal(length(a), 1) expect_equal(length(b), 2) expect_equal(nrow(b$data), 2) expect_equal(NCOL(b$facet$counter_total_all$journal), 9) expect_equal(length(c), 1) expect_equal(length(d), 2) expect_equal(length(d$data$alm_twitterCount), 8) expect_equal(length(e$facet$alm_twitterCount), 2) expect_equal(NCOL(e$facet$alm_twitterCount$volume), 9) # classes expect_is(a, "sr_stats") expect_is(b, "list") expect_is(b$data, "data.frame") expect_is(b$facet$counter_total_all$journal, "data.frame") expect_is(c, "sr_stats") expect_equal(attr(c, "wt"), "json") expect_is(d, "list") expect_is(e, "list") # solr_stats expect_is( solr_stats(conn_plos, params = list(q='ecology', stats.field=c('counter_total_all','alm_twitterCount'), stats.facet=c('journal','volume'))), "list" ) expect_is( solr_stats(conn_plos, params = list(q='ecology', stats.field=c('counter_total_all','alm_twitterCount'), stats.facet=c('journal','volume')), raw=TRUE), "sr_stats" ) }) test_that("solr_stats works using wt=xml", { skip_on_cran() aa <- conn_plos$stats(params = list(q='science', wt="xml", stats.field='counter_total_all'), raw=TRUE) bb <- conn_plos$stats(params = list(q='science', wt="xml", stats.field='counter_total_all')) cc <- conn_plos$stats(params = list(q='science', wt="xml", stats.field=c('counter_total_all','alm_twitterCount'), stats.facet=c('journal','volume'))) # correct dimenions expect_equal(length(aa), 1) expect_equal(length(bb), 2) expect_equal(NROW(bb$data), 1) expect_named(cc$facet[[1]], c("volume", "journal")) expect_equal(length(cc), 2) # classes expect_is(aa, "sr_stats") expect_is(bb, "list") expect_is(cc, "list") expect_is(bb$data, "data.frame") expect_is(cc$facet[[1]][[1]], "data.frame") expect_equal(attr(aa, "wt"), "xml") }) solrium/tests/testthat/test-ping.R0000644000176200001440000000372013167507346017021 0ustar liggesuserscontext("ping - regular mode") test_that("ping works", { skip_on_cran() skip_if_not(!is_in_cloud_mode(conn)) if (!conn$core_exists("gettingstarted")) conn$core_create("gettingstarted") aa <- conn$ping(name = "gettingstarted") expect_is(aa, "list") expect_is(aa$responseHeader, "list") expect_equal(aa$responseHeader$status, 0) expect_equal(aa$responseHeader$params$q, "{!lucene}*:*") }) test_that("ping gives raw data correctly", { skip_on_cran() skip_if_not(!is_in_cloud_mode(conn)) expect_is(ping("gettingstarted", raw = TRUE), "ping") expect_is(ping("gettingstarted", raw = FALSE), "list") expect_is(ping("gettingstarted", wt = "xml", raw = TRUE), "ping") expect_is(ping("gettingstarted", wt = "xml", raw = FALSE), "xml_document") }) test_that("ping fails well", { skip_on_cran() skip_if_not(!is_in_cloud_mode(conn)) expect_equal(ping()$status, "not found") expect_equal(ping("adfdafs")$status, "not found") }) context("ping - cloud mode") test_that("ping works", { skip_on_cran() skip_if_not(is_in_cloud_mode(conn)) if (!conn$collection_exists("gettingstarted")) { conn$collection_create("gettingstarted") } aa <- conn$ping(name = "gettingstarted") expect_is(aa, "list") expect_is(aa$responseHeader, "list") expect_equal(aa$responseHeader$status, 0) expect_equal(aa$responseHeader$params$q, "{!lucene}*:*") }) test_that("ping gives raw data correctly", { skip_on_cran() skip_if_not(is_in_cloud_mode(conn)) expect_is(ping(conn, "gettingstarted", raw = TRUE), "ping") expect_is(ping(conn, "gettingstarted", raw = FALSE), "list") expect_is(ping(conn, "gettingstarted", wt = "xml", raw = TRUE), "ping") expect_is(ping(conn, "gettingstarted", wt = "xml", raw = FALSE), "xml_document") }) test_that("ping fails well", { skip_on_cran() skip_if_not(is_in_cloud_mode(conn)) expect_error(conn$ping()$status, "argument \"name\" is missing") expect_equal(conn$ping("adfdafs")$status, "not found") }) solrium/tests/test-all.R0000644000176200001440000000005212600402073014745 0ustar liggesuserslibrary('testthat') test_check('solrium') solrium/tests/cloud_mode/0000755000176200001440000000000012600402073015212 5ustar liggesuserssolrium/tests/cloud_mode/test-add.R0000644000176200001440000000122512600426466017056 0ustar liggesuserscontext("add documents") # Using with Solr Cloud mode test_that("adding documents from a ", { solr_connect() # setup pinged <- ping(name = "helloWorld", verbose = FALSE)$status if (pinged != "OK") collection_create(name = "helloWorld", numShards = 2) # list works ss <- list(list(id = 1, price = 100), list(id = 2, price = 500)) list_out <- add(ss, "helloWorld") expect_is(list_out, "list") expect_equal(list_out$responseHeader$status, 0) # data.frame works df <- data.frame(id = c(67, 68), price = c(1000, 500000000)) df_out <- add(df, "helloWorld") expect_is(df_out, "list") expect_equal(df_out$responseHeader$status, 0) }) solrium/tests/cloud_mode/test-collections.R0000644000176200001440000000117112600402073020630 0ustar liggesuserscontext("collections management") # Using with Solr Cloud mode test_that("adding a collection works", { solr_connect() ss <- list(list(id = 1, price = 100), list(id = 2, price = 500)) # setup pinged <- ping(name = "helloWorld", verbose = FALSE)$status if (pinged != "OK") collection_delete(name = "helloWorld") # add collection list_out <- add(ss, "helloWorld") expect_is(list_out, "list") expect_equal(list_out$responseHeader$status, 0) }) test_that("adding a collection fails well", { solr_connect() expect_error(collection_create(name = "helloWorld", verbose = FALSE), "collection already exists") }) solrium/NAMESPACE0000644000176200001440000000466413167507346013211 0ustar liggesusers# Generated by roxygen2: do not edit by hand S3method(add,data.frame) S3method(add,list) S3method(solr_parse,default) S3method(solr_parse,ping) S3method(solr_parse,sr_all) S3method(solr_parse,sr_facet) S3method(solr_parse,sr_group) S3method(solr_parse,sr_high) S3method(solr_parse,sr_mlt) S3method(solr_parse,sr_search) S3method(solr_parse,sr_stats) S3method(solr_parse,update) export(SolrClient) export(add) export(collection_addreplica) export(collection_addreplicaprop) export(collection_addrole) export(collection_balanceshardunique) export(collection_clusterprop) export(collection_clusterstatus) export(collection_create) export(collection_createalias) export(collection_createshard) export(collection_delete) export(collection_deletealias) export(collection_deletereplica) export(collection_deletereplicaprop) export(collection_deleteshard) export(collection_exists) export(collection_list) export(collection_migrate) export(collection_overseerstatus) export(collection_rebalanceleaders) export(collection_reload) export(collection_removerole) export(collection_requeststatus) export(collection_splitshard) export(collections) export(commit) export(config_get) export(config_overlay) export(config_params) export(config_set) export(core_create) export(core_exists) export(core_mergeindexes) export(core_reload) export(core_rename) export(core_requeststatus) export(core_split) export(core_status) export(core_swap) export(core_unload) export(cores) export(delete_by_id) export(delete_by_query) export(is.sr_facet) export(is.sr_high) export(is.sr_search) export(ping) export(schema) export(solr_all) export(solr_facet) export(solr_get) export(solr_group) export(solr_highlight) export(solr_mlt) export(solr_optimize) export(solr_parse) export(solr_search) export(solr_stats) export(update_atomic_json) export(update_atomic_xml) export(update_csv) export(update_json) export(update_xml) importFrom(R6,R6Class) importFrom(crul,HttpClient) importFrom(dplyr,bind_rows) importFrom(jsonlite,fromJSON) importFrom(plyr,rbind.fill) importFrom(tibble,add_column) importFrom(tibble,as_data_frame) importFrom(tibble,as_tibble) importFrom(tibble,data_frame) importFrom(utils,URLdecode) importFrom(utils,head) importFrom(utils,modifyList) importFrom(utils,read.table) importFrom(xml2,read_xml) importFrom(xml2,xml_attr) importFrom(xml2,xml_attrs) importFrom(xml2,xml_children) importFrom(xml2,xml_find_all) importFrom(xml2,xml_find_first) importFrom(xml2,xml_name) importFrom(xml2,xml_text) solrium/NEWS.md0000644000176200001440000000472413176475551013067 0ustar liggesuserssolrium 1.0.0 ============= This is v1, indicating breaking changes from the previous version! ### NEW FEATURES * Package has been reworked to allow control over what parameters are sent as query parameters and which as body. If only query parameters given, we do a `GET` request, but if any body parameters given (even if query params given) we do a `POST` request. This means that all `solr_*` functions have more or less the same parameters, and you now pass query parameters to `params` and body parameters to `body`. This definitely breaks previous code, apologies for that, but the bump in major version is a big indicator of the breakage. * As part of overhaual, moved to using an `R6` setup for the Solr connection object. The connection object deals with connection details, and you can call all methods on the object created. Additionally, you can simply pass the connection object to standalone methods. This change means you can create connection objects to >1 Solr instance, so you can use many Solr instances in one R session. (#100) * gains new functions `update_atomic_json` and `update_atomic_xml` for doing atomic updates (#97) thanks @yinghaoh * `solr_search` and `solr_all` gain attributes that include `numFound`, `start`, and `maxScore` (#94) * `solr_search`/`solr_all`/`solr_mlt` gain new feature where we automically check for and adjust `rows` parameter for you if you allow us to. You can toggle this behavior and you can set a minimum number for rows to be optimized with `minOptimizedRows`. See (#102) (#104) (#105) for discussion. Thanks @1havran ### MINOR IMPROVEMENTS * Replaced `httr` with `crul`. Should only be noticeable with respect to specifying curl options (#98) * Added more tests (#56) * `optimize` renamed to `solr_optimize` (#107) * now `solr_facet` fails better when no `facet.*` fields given (#103) ### BUG FIXES * Fixed `solr_highlight` parsing to data.frame bug (#109) solrium 0.4.0 ============= ### MINOR IMPROVEMENTS * Change `dplyr::rbind_all()` (deprecated) to `dplyr::bind_rows()` (#90) * Added additional examples of using pivot facetting to `solr_facet()` (#91) * Fix to `solr_group()` (#92) * Replaced dependency `XML` with `xml2` (#57) * Added examples and tests for a few more public Solr instances (#30) * Now using `tibble` to give back compact data.frame's * namespace all base package calls * Many changes to internal parsers to use `xml2` instead of `XML`, and improvements solrium 0.3.0 ============= ### NEW FEATURES * released to CRAN solrium/R/0000755000176200001440000000000013167507346012161 5ustar liggesuserssolrium/R/collection_delete.R0000644000176200001440000000117613176213150015751 0ustar liggesusers#' Add a collection #' #' @export #' @param conn A solrium connection object, see [SolrClient] #' @param name (character) The name of the core to be created. Required #' @param raw (logical) If \code{TRUE}, returns raw data #' @param callopts curl options passed on to \code{\link[crul]{HttpClient}} #' @examples \dontrun{ #' (conn <- SolrClient$new()) #' #' if (!conn$collection_exists("helloWorld")) { #' conn$collection_create(name = "helloWorld") #' } #' #' collection_delete(conn, name = "helloWorld") #' } collection_delete <- function(conn, name, raw = FALSE, callopts = list()) { conn$collection_delete(name, raw, callopts) } solrium/R/solr_all.r0000644000176200001440000000511113176421214014136 0ustar liggesusers#' @title All purpose search #' #' @description Includes documents, facets, groups, mlt, stats, and highlights #' #' @export #' @template search #' @template optimizerows #' @param conn A solrium connection object, see [SolrClient] #' @param params (list) a named list of parameters, results in a GET reqeust #' as long as no body parameters given #' @param body (list) a named list of parameters, if given a POST request #' will be performed #' @return XML, JSON, a list, or data.frame #' @seealso [solr_highlight()], [solr_facet()] #' @references See for #' more information. #' @examples \dontrun{ #' # connect #' (cli <- SolrClient$new(host = "api.plos.org", path = "search", port = NULL)) #' #' solr_all(cli, params = list(q='*:*', rows=2, fl='id')) #' #' # facets #' solr_all(cli, params = list(q='*:*', rows=2, fl='id', facet="true", #' facet.field="journal")) #' #' # mlt #' solr_all(cli, params = list(q='ecology', rows=2, fl='id', mlt='true', #' mlt.count=2, mlt.fl='abstract')) #' #' # facets and mlt #' solr_all(cli, params = list(q='ecology', rows=2, fl='id', facet="true", #' facet.field="journal", mlt='true', mlt.count=2, mlt.fl='abstract')) #' #' # stats #' solr_all(cli, params = list(q='ecology', rows=2, fl='id', stats='true', #' stats.field='counter_total_all')) #' #' # facets, mlt, and stats #' solr_all(cli, params = list(q='ecology', rows=2, fl='id', facet="true", #' facet.field="journal", mlt='true', mlt.count=2, mlt.fl='abstract', #' stats='true', stats.field='counter_total_all')) #' #' # group #' solr_all(cli, params = list(q='ecology', rows=2, fl='id', group='true', #' group.field='journal', group.limit=3)) #' #' # facets, mlt, stats, and groups #' solr_all(cli, params = list(q='ecology', rows=2, fl='id', facet="true", #' facet.field="journal", mlt='true', mlt.count=2, mlt.fl='abstract', #' stats='true', stats.field='counter_total_all', group='true', #' group.field='journal', group.limit=3)) #' #' # using wt = xml #' solr_all(cli, params = list(q='*:*', rows=50, fl=c('id','score'), #' fq='doc_type:full', wt="xml"), raw=TRUE) #' } solr_all <- function(conn, name = NULL, params = NULL, body = NULL, callopts=list(), raw=FALSE, parsetype='df', concat=',', optimizeMaxRows = TRUE, minOptimizedRows = 50000L, ...) { conn$all(name = name, params = params, body = body, callopts = callopts, raw = raw, parsetype = parsetype, concat = concat, optimizeMaxRows = optimizeMaxRows, minOptimizedRows = minOptimizedRows, ...) } solrium/R/collection_overseerstatus.R0000644000176200001440000000132113167507346017612 0ustar liggesusers#' @title Get overseer status #' #' @description Returns the current status of the overseer, performance #' statistics of various overseer APIs as well as last 10 failures per #' operation type. #' #' @export #' @inheritParams collection_create #' @examples \dontrun{ #' (conn <- SolrClient$new()) #' conn$collection_overseerstatus() #' res <- conn$collection_overseerstatus() #' res$responseHeader #' res$leader #' res$overseer_queue_size #' res$overseer_work_queue_size #' res$overseer_operations #' res$collection_operations #' res$overseer_queue #' res$overseer_internal_queue #' res$collection_queue #' } collection_overseerstatus <- function(conn, raw = FALSE, ...) { conn$collection_overseerstatus(raw, ...) } solrium/R/delete.R0000644000176200001440000000405013167507346013545 0ustar liggesusers#' Delete documents by ID or query #' #' @name delete #' @param conn A solrium connection object, see [SolrClient] #' @param ids Document IDs, one or more in a vector or list #' @param name (character) A collection or core name. Required. #' @param query Query to use to delete documents #' @param commit (logical) If `TRUE`, documents immediately searchable. #' Deafult: `TRUE` #' @param commit_within (numeric) Milliseconds to commit the change, the #' document will be added within that time. Default: `NULL` #' @param overwrite (logical) Overwrite documents with matching keys. #' Default: `TRUE` #' @param boost (numeric) Boost factor. Default: `NULL` #' @param wt (character) One of json (default) or xml. If json, uses #' [jsonlite::fromJSON()] to parse. If xml, uses [xml2::read_xml()] to #' parse #' @param raw (logical) If `TRUE`, returns raw data in format specified by #' `wt` param #' @param ... curl options passed on to [crul::HttpClient] #' @details We use json internally as data interchange format for this function. #' @examples \dontrun{ #' (cli <- SolrClient$new()) #' #' # add some documents first #' ss <- list(list(id = 1, price = 100), list(id = 2, price = 500)) #' cli$add(ss, name = "gettingstarted") #' #' # Now, delete them #' # Delete by ID #' cli$delete_by_id(ids = 1, "gettingstarted") #' ## Many IDs #' cli$delete_by_id(ids = c(3, 4), "gettingstarted") #' #' # Delete by query #' cli$delete_by_query(query = "manu:bank", "gettingstarted") #' } #' @export #' @name delete delete_by_id <- function(conn, ids, name, commit = TRUE, commit_within = NULL, overwrite = TRUE, boost = NULL, wt = 'json', raw = FALSE, ...) { check_sr(conn) conn$delete_by_id(ids, name, commit, commit_within, overwrite, boost, wt, raw, ...) } #' @export #' @name delete delete_by_query <- function(conn, query, name, commit = TRUE, commit_within = NULL, overwrite = TRUE, boost = NULL, wt = 'json', raw = FALSE, ...) { check_sr(conn) conn$delete_by_query(query, name, commit, commit_within, overwrite, boost, wt, raw, ...) } solrium/R/collection_deletereplicaprop.R0000644000176200001440000000347013176214130020210 0ustar liggesusers#' @title Delete a replica property #' #' @description Deletes an arbitrary property from a particular replica. #' #' @export #' @inheritParams collection_create #' @param shard (character) Required. The name of the shard the replica #' belongs to. #' @param replica (character) Required. The replica, e.g. core_node1. #' @param property (character) Required. The property to delete. Note: this #' will have the literal 'property.' prepended to distinguish it from #' system-maintained properties. So these two forms are equivalent: #' `property=special` and `property=property.special` #' @examples \dontrun{ #' (conn <- SolrClient$new()) #' #' # create collection #' if (!conn$collection_exists("deleterep")) { #' conn$collection_create(name = "deleterep") #' # OR bin/solr create -c deleterep #' } #' #' # status #' conn$collection_clusterstatus()$cluster$collections$deleterep$shards #' #' # add the value bar to the property foo #' conn$collection_addreplicaprop(name = "deleterep", shard = "shard1", #' replica = "core_node1", property = "foo", property.value = "bar") #' #' # check status #' conn$collection_clusterstatus()$cluster$collections$deleterep$shards #' conn$collection_clusterstatus()$cluster$collections$deleterep$shards$shard1$replicas$core_node1 #' #' # delete replica property #' conn$collection_deletereplicaprop(name = "deleterep", shard = "shard1", #' replica = "core_node1", property = "foo") #' #' # check status - foo should be gone #' conn$collection_clusterstatus()$cluster$collections$deleterep$shards$shard1$replicas$core_node1 #' } collection_deletereplicaprop <- function(conn, name, shard, replica, property, raw = FALSE, callopts=list()) { conn$collection_deletereplicaprop(name, shard, replica, property, raw, callopts) } solrium/R/solr_facet.r0000644000176200001440000001134013176432220014450 0ustar liggesusers#' @title Faceted search #' #' @description Returns only facet items #' #' @export #' @template facet #' @param conn A solrium connection object, see [SolrClient] #' @param params (list) a named list of parameters, results in a GET reqeust #' as long as no body parameters given #' @param body (list) a named list of parameters, if given a POST request #' will be performed #' @return Raw json or xml, or a list of length 4 parsed elements #' (usually data.frame's). #' @seealso [solr_search()], [solr_highlight()], [solr_parse()] #' @references See for #' more information on faceting. #' @examples \dontrun{ #' # connect - local Solr instance #' (cli <- SolrClient$new()) #' cli$facet("gettingstarted", params = list(q="*:*", facet.field='name')) #' cli$facet("gettingstarted", params = list(q="*:*", facet.field='name'), #' callopts = list(verbose = TRUE)) #' cli$facet("gettingstarted", body = list(q="*:*", facet.field='name'), #' callopts = list(verbose = TRUE)) #' #' # Facet on a single field #' solr_facet(cli, "gettingstarted", params = list(q='*:*', facet.field='name')) #' #' # Remote instance #' (cli <- SolrClient$new(host = "api.plos.org", path = "search", port = NULL)) #' #' # Facet on multiple fields #' solr_facet(cli, params = list(q='alcohol', #' facet.field = c('journal','subject'))) #' #' # Using mincount #' solr_facet(cli, params = list(q='alcohol', facet.field='journal', #' facet.mincount='500')) #' #' # Using facet.query to get counts #' solr_facet(cli, params = list(q='*:*', facet.field='journal', #' facet.query=c('cell','bird'))) #' #' # Using facet.pivot to simulate SQL group by counts #' solr_facet(cli, params = list(q='alcohol', facet.pivot='journal,subject', #' facet.pivot.mincount=10)) #' ## two or more fields are required - you can pass in as a single #' ## character string #' solr_facet(cli, params = list(q='*:*', facet.pivot = "journal,subject", #' facet.limit = 3)) #' ## Or, pass in as a vector of length 2 or greater #' solr_facet(cli, params = list(q='*:*', facet.pivot = c("journal", "subject"), #' facet.limit = 3)) #' #' # Date faceting #' solr_facet(cli, params = list(q='*:*', facet.date='publication_date', #' facet.date.start='NOW/DAY-5DAYS', facet.date.end='NOW', #' facet.date.gap='+1DAY')) #' ## two variables #' solr_facet(cli, params = list(q='*:*', #' facet.date=c('publication_date', 'timestamp'), #' facet.date.start='NOW/DAY-5DAYS', facet.date.end='NOW', #' facet.date.gap='+1DAY')) #' #' # Range faceting #' solr_facet(cli, params = list(q='*:*', facet.range='counter_total_all', #' facet.range.start=5, facet.range.end=1000, facet.range.gap=10)) #' #' # Range faceting with > 1 field, same settings #' solr_facet(cli, params = list(q='*:*', #' facet.range=c('counter_total_all','alm_twitterCount'), #' facet.range.start=5, facet.range.end=1000, facet.range.gap=10)) #' #' # Range faceting with > 1 field, different settings #' solr_facet(cli, params = list(q='*:*', #' facet.range=c('counter_total_all','alm_twitterCount'), #' f.counter_total_all.facet.range.start=5, #' f.counter_total_all.facet.range.end=1000, #' f.counter_total_all.facet.range.gap=10, #' f.alm_twitterCount.facet.range.start=5, #' f.alm_twitterCount.facet.range.end=1000, #' f.alm_twitterCount.facet.range.gap=10)) #' #' # Get raw json or xml #' ## json #' solr_facet(cli, params = list(q='*:*', facet.field='journal'), raw=TRUE) #' ## xml #' solr_facet(cli, params = list(q='*:*', facet.field='journal', wt='xml'), #' raw=TRUE) #' #' # Get raw data back, and parse later, same as what goes on internally if #' # raw=FALSE (Default) #' out <- solr_facet(cli, params = list(q='*:*', facet.field='journal'), #' raw=TRUE) #' solr_parse(out) #' out <- solr_facet(cli, params = list(q='*:*', facet.field='journal', #' wt = 'xml'), raw=TRUE) #' solr_parse(out) #' #' # Using the USGS BISON API (https://bison.usgs.gov/#solr) #' ## The occurrence endpoint #' (cli <- SolrClient$new(host = "bison.usgs.gov", scheme = "https", #' path = "solr/occurrences/select", port = NULL)) #' solr_facet(cli, params = list(q='*:*', facet.field='year')) #' solr_facet(cli, params = list(q='*:*', facet.field='computedStateFips')) #' #' # using a proxy #' # cli <- SolrClient$new(host = "api.plos.org", path = "search", port = NULL, #' # proxy = list(url = "http://54.195.48.153:8888")) #' # solr_facet(cli, params = list(facet.field='journal'), #' # callopts=list(verbose=TRUE)) #' } solr_facet <- function(conn, name = NULL, params = list(q = '*:*'), body = NULL, callopts = list(), raw = FALSE, parsetype = 'df', concat = ',', ...) { conn$facet(name = name, params = params, body = body, callopts = callopts, raw = raw, parsetype = parsetype, concat = concat, ...) } solrium/R/solr_stats.r0000644000176200001440000000446413176430775014552 0ustar liggesusers#' @title Solr stats #' #' @description Returns only stat items #' #' @export #' @template stats #' @param conn A solrium connection object, see [SolrClient] #' @param params (list) a named list of parameters, results in a GET reqeust #' as long as no body parameters given #' @param body (list) a named list of parameters, if given a POST request #' will be performed #' @return XML, JSON, a list, or data.frame #' @seealso [solr_highlight()], [solr_facet()], [solr_search()], [solr_mlt()] #' @references See for #' more information on Solr stats. #' @examples \dontrun{ #' # connect #' (cli <- SolrClient$new(host = "api.plos.org", path = "search", port = NULL)) #' #' # get stats #' solr_stats(cli, params = list(q='science', stats.field='counter_total_all'), #' raw=TRUE) #' solr_stats(cli, params = list(q='title:"ecology" AND body:"cell"', #' stats.field=c('counter_total_all','alm_twitterCount'))) #' solr_stats(cli, params = list(q='ecology', #' stats.field=c('counter_total_all','alm_twitterCount'), #' stats.facet='journal')) #' solr_stats(cli, params = list(q='ecology', #' stats.field=c('counter_total_all','alm_twitterCount'), #' stats.facet=c('journal','volume'))) #' #' # Get raw data, then parse later if you feel like it #' ## json #' out <- solr_stats(cli, params = list(q='ecology', #' stats.field=c('counter_total_all','alm_twitterCount'), #' stats.facet=c('journal','volume')), raw=TRUE) #' library("jsonlite") #' jsonlite::fromJSON(out) #' solr_parse(out) # list #' solr_parse(out, 'df') # data.frame #' #' ## xml #' out <- solr_stats(cli, params = list(q='ecology', #' stats.field=c('counter_total_all','alm_twitterCount'), #' stats.facet=c('journal','volume'), wt="xml"), raw=TRUE) #' library("xml2") #' xml2::read_xml(unclass(out)) #' solr_parse(out) # list #' solr_parse(out, 'df') # data.frame #' #' # Get verbose http call information #' solr_stats(cli, params = list(q='ecology', stats.field='alm_twitterCount'), #' callopts=list(verbose=TRUE)) #' } solr_stats <- function(conn, name = NULL, params = list(q = '*:*', stats.field = NULL, stats.facet = NULL), body = NULL, callopts=list(), raw=FALSE, parsetype='df', ...) { conn$stats(name = name, params = params, body = body, callopts = callopts, raw = raw, parsetype = parsetype, ...) } solrium/R/solrium-package.R0000644000176200001440000000552513167507346015376 0ustar liggesusers#' General purpose R interface to Solr. #' #' This package has support for all the search endpoints, as well as a suite #' of functions for managing a Solr database, including adding and deleting #' documents. #' #' @section Important search functions: #' #' \itemize{ #' \item \code{\link{solr_search}} - General search, only returns documents #' \item \code{\link{solr_all}} - General search, including all non-documents #' in addition to documents: facets, highlights, groups, mlt, stats. #' \item \code{\link{solr_facet}} - Faceting only (w/o general search) #' \item \code{\link{solr_highlight}} - Highlighting only (w/o general search) #' \item \code{\link{solr_mlt}} - More like this (w/o general search) #' \item \code{\link{solr_group}} - Group search (w/o general search) #' \item \code{\link{solr_stats}} - Stats search (w/o general search) #' } #' #' @section Important Solr management functions: #' #' \itemize{ #' \item \code{\link{update_json}} - Add or delete documents using json in a #' file #' \item \code{\link{add}} - Add documents via an R list or data.frame #' \item \code{\link{delete_by_id}} - Delete documents by ID #' \item \code{\link{delete_by_query}} - Delete documents by query #' } #' #' @section Vignettes: #' #' See the vignettes for help \code{browseVignettes(package = "solrium")} #' #' @section Performance: #' #' \code{v0.2} and above of this package will have \code{wt=csv} as the default. #' This should give significant performance improvement over the previous #' default of \code{wt=json}, which pulled down json, parsed to an R list, #' then to a data.frame. With \code{wt=csv}, we pull down csv, and read that #' in directly to a data.frame. #' #' The http library we use, \pkg{crul}, sets gzip compression header by #' default. As long as compression is used server side, you're good to go on #' compression, which should be a good peformance boost. See #' \url{https://wiki.apache.org/solr/SolrPerformanceFactors#Query_Response_Compression} #' for notes on how to enable compression. #' #' There are other notes about Solr performance at #' \url{https://wiki.apache.org/solr/SolrPerformanceFactors} that can be #' used server side/in your Solr config, but aren't things to tune here in #' this R client. #' #' Let us know if there's any further performance improvements we can make. #' #' @importFrom utils URLdecode head modifyList read.table #' @importFrom crul HttpClient #' @importFrom xml2 read_xml xml_children xml_find_first xml_find_all #' xml_name xml_text xml_attr xml_attrs #' @importFrom jsonlite fromJSON #' @importFrom plyr rbind.fill #' @importFrom dplyr bind_rows #' @importFrom tibble data_frame as_data_frame as_tibble add_column #' @importFrom R6 R6Class #' @name solrium-package #' @aliases solrium #' @docType package #' @author Scott Chamberlain \email{myrmecocystus@@gmail.com} #' @keywords package NULL solrium/R/core_requeststatus.R0000644000176200001440000000103613167507346016250 0ustar liggesusers#' Request status of asynchronous CoreAdmin API call #' #' @export #' #' @param requestid The name of one of the cores to be removed. Required #' @inheritParams core_create #' @examples \dontrun{ #' # start Solr with Schemaless mode via the schemaless eg: #' # bin/solr start -e schemaless #' #' # FIXME: not tested yet... #' # (conn <- SolrClient$new()) #' # conn$core_requeststatus(requestid = 1) #' } core_requeststatus <- function(conn, requestid, raw = FALSE, callopts = list()) { conn$core_requeststatus(requestid, raw, callopts) } solrium/R/config_params.R0000644000176200001440000000411013176256011015076 0ustar liggesusers#' Set Solr configuration params #' #' @export #' #' @param conn A solrium connection object, see [SolrClient] #' @param name (character) The name of the core. If not given, all cores. #' @param param (character) Name of a parameter #' @param set (list) List of key:value pairs of what to set. Create or overwrite #' a parameter set map. Default: NULL (nothing passed) #' @param unset (list) One or more character strings of keys to unset. Default: NULL #' (nothing passed) #' @param update (list) List of key:value pairs of what to update. Updates a parameter #' set map. This essentially overwrites the old parameter set, so all parameters must #' be sent in each update request. #' @param ... curl options passed on to [crul::HttpClient] #' @return A list with response from server #' @details The Request Parameters API allows creating parameter sets that can #' override or take the place of parameters defined in solrconfig.xml. It is #' really another endpoint of the Config API instead of a separate API, and #' has distinct commands. It does not replace or modify any sections of #' solrconfig.xml, but instead provides another approach to handling parameters #' used in requests. It behaves in the same way as the Config API, by storing #' parameters in another file that will be used at runtime. In this case, #' the parameters are stored in a file named params.json. This file is kept in #' ZooKeeper or in the conf directory of a standalone Solr instance. #' @examples \dontrun{ #' # start Solr in standard or Cloud mode #' # connect #' (conn <- SolrClient$new()) #' #' # set a parameter set #' myFacets <- list(myFacets = list(facet = TRUE, facet.limit = 5)) #' config_params(conn, "gettingstarted", set = myFacets) #' #' # check a parameter #' config_params(conn, "gettingstarted", param = "myFacets") #' } config_params <- function(conn, name, param = NULL, set = NULL, unset = NULL, update = NULL, ...) { conn$config_params(name, param, set, unset, update, ...) } name_by <- function(x, y) { if (is.null(x)) { NULL } else { stats::setNames(list(y = x), y) } } solrium/R/ping.R0000644000176200001440000000264413167507346013247 0ustar liggesusers#' Ping a Solr instance #' #' @export #' @param conn A solrium connection object, see [SolrClient] #' @param name (character) Name of a collection or core. Required. #' @param wt (character) One of json (default) or xml. If json, uses #' [jsonlite::fromJSON()] to parse. If xml, uses [xml2::read_xml)] to parse #' @param raw (logical) If `TRUE`, returns raw data in format specified by #' `wt` param #' @param ... curl options passed on to [crul::HttpClient] #' #' @return if `wt="xml"` an object of class `xml_document`, if #' `wt="json"` an object of class `list` #' #' @details You likely may not be able to run this function against many public #' Solr services as they hopefully don't expose their admin interface to the #' public, but works locally. #' #' @examples \dontrun{ #' # start Solr, in your CLI, run: `bin/solr start -e cloud -noprompt` #' # after that, if you haven't run `bin/post -c gettingstarted docs/` yet, #' # do so #' #' # connect: by default we connect to localhost, port 8983 #' (cli <- SolrClient$new()) #' #' # ping the gettingstarted index #' cli$ping("gettingstarted") #' ping(cli, "gettingstarted") #' ping(cli, "gettingstarted", wt = "xml") #' ping(cli, "gettingstarted", verbose = FALSE) #' ping(cli, "gettingstarted", raw = TRUE) #' #' ping(cli, "gettingstarted", wt="xml", verbose = TRUE) #' } ping <- function(conn, name, wt = 'json', raw = FALSE, ...) { conn$ping(name = name, wt = wt, raw = raw, ...) } solrium/R/commit.R0000644000176200001440000000255013176255773013602 0ustar liggesusers#' Commit #' #' @export #' @param conn A solrium connection object, see [SolrClient] #' @param name (character) A collection or core name. Required. #' @param expunge_deletes merge segments with deletes away. Default: `FALSE` #' @param wait_searcher block until a new searcher is opened and registered as #' the main query searcher, making the changes visible. Default: `TRUE` #' @param soft_commit perform a soft commit - this will refresh the 'view' of #' the index in a more performant manner, but without "on-disk" guarantees. #' Default: `FALSE` #' @param wt (character) One of json (default) or xml. If json, uses #' [jsonlite::fromJSON()] to parse. If xml, uses [xml2::read_xml()] to parse #' @param raw (logical) If `TRUE`, returns raw data in format specified by #' `wt` param #' @param ... curl options passed on to [crul::HttpClient] #' @references <> #' @examples \dontrun{ #' (conn <- SolrClient$new()) #' #' conn$commit("gettingstarted") #' conn$commit("gettingstarted", wait_searcher = FALSE) #' #' # get xml back #' conn$commit("gettingstarted", wt = "xml") #' ## raw xml #' conn$commit("gettingstarted", wt = "xml", raw = TRUE) #' } commit <- function(conn, name, expunge_deletes = FALSE, wait_searcher = TRUE, soft_commit = FALSE, wt = 'json', raw = FALSE, ...) { conn$commit(name, expunge_deletes, wait_searcher, soft_commit, wt, raw, ...) } solrium/R/update_csv.R0000644000176200001440000000354413176205755014446 0ustar liggesusers#' Update documents with CSV data #' #' @export #' @family update #' @template csvcreate #' @param conn A solrium connection object, see [SolrClient] #' @param files Path to a single file to load into Solr #' @param name (character) Name of the core or collection #' @param wt (character) One of json (default) or xml. If json, uses #' \code{\link[jsonlite]{fromJSON}} to parse. If xml, uses #' \code{\link[xml2]{read_xml}} to parse #' @param raw (logical) If TRUE, returns raw data in format specified by #' \code{wt} param #' @param ... curl options passed on to [crul::HttpClient] #' @note SOLR v1.2 was first version to support csv. See #' \url{https://issues.apache.org/jira/browse/SOLR-66} #' @examples \dontrun{ #' # start Solr: bin/solr start -f -c -p 8983 #' #' # connect #' (cli <- SolrClient$new()) #' #' if (!cli$collection_exists("helloWorld")) { #' cli$collection_create(name = "helloWorld", numShards = 2) #' } #' #' df <- data.frame(id=1:3, name=c('red', 'blue', 'green')) #' write.csv(df, file="df.csv", row.names=FALSE, quote = FALSE) #' conn$update_csv("df.csv", "helloWorld", verbose = TRUE) #' #' # give back raw xml #' conn$update_csv("df.csv", "helloWorld", wt = "xml") #' ## raw json #' conn$update_csv("df.csv", "helloWorld", wt = "json", raw = TRUE) #' } update_csv <- function(conn, files, name, separator = ',', header = TRUE, fieldnames = NULL, skip = NULL, skipLines = 0, trim = FALSE, encapsulator = NULL, escape = NULL, keepEmpty = FALSE, literal = NULL, map = NULL, split = NULL, rowid = NULL, rowidOffset = NULL, overwrite = NULL, commit = NULL, wt = 'json', raw = FALSE, ...) { check_sr(conn) conn$update_csv(files, name, separator, header, fieldnames, skip, skipLines, trim, encapsulator, escape, keepEmpty, literal, map, split, rowid, rowidOffset, overwrite, commit, wt, raw, ...) } solrium/R/core_exists.R0000644000176200001440000000135313167507346014635 0ustar liggesusers#' Check if a core exists #' #' @export #' #' @inheritParams core_create #' @details Simply calls [core_status()] internally #' @return A single boolean, `TRUE` or `FALSE` #' @examples \dontrun{ #' # start Solr with Schemaless mode via the schemaless eg: #' # bin/solr start -e schemaless #' # you can create a new core like: bin/solr create -c corename #' # where is the name for your core - or create as below #' #' # connect #' (conn <- SolrClient$new()) #' #' # exists #' conn$core_exists("gettingstarted") #' #' # doesn't exist #' conn$core_exists("hhhhhh") #' } core_exists <- function(conn, name, callopts=list()) { tmp <- suppressMessages(core_status(conn, name = name, callopts = callopts)) length(tmp$status[[1]]) > 0 } solrium/R/search_route_keys.R0000644000176200001440000000613113167507346016023 0ustar liggesusersfilter_keys <- function(lst, keys) lst[names(lst) %in% keys] # nocov start keys_search <- c("q", "sort", "start", "rows", "pageDoc", "pageScore", "fl", "defType", "timeAllowed", "qt", "wt", "NOW", "TZ", "echoHandler", "echoParams") keys_facet <- c( "q", "facet.query", "facet.field", "facet.prefix", "facet.sort", "facet.limit", "facet.offset", "facet.mincount", "facet.missing", "facet.method", "facet.enum.cache.minDf", "facet.threads", "facet.date", "facet.date.start", "facet.date.end", "facet.date.gap", "facet.date.hardend", "facet.date.other", "facet.date.include", "facet.range", "facet.range.start", "facet.range.end", "facet.range.gap", "facet.range.hardend", "facet.range.other", "facet.range.include", "facet.pivot", "facet.pivot.mincount", "start", "rows", "key", "wt") keys_stats <- c("q", "stats.field", "stats.facet", "start", "rows", "key", "wt") keys_high <- c("fl", "fq", "hl", "hl.fl", "hl.alternateField", "hl.alternateField ", "hl.boundaryScanner", "hl.boundaryScanner ", "hl.bs.chars", "hl.bs.chars", "hl.bs.country", "hl.bs.country ", "hl.bs.language", "hl.bs.language ", "hl.bs.maxScan", "hl.bs.maxScan", "hl.bs.type", "hl.bs.type ", "hl.formatter", "hl.formatter", "hl.fragListBuilder", "hl.fragListBuilder ", "hl.fragmenter", "hl.fragmenter ", "hl.fragmentsBuilder", "hl.fragmentsBuilder", "hl.fragsize", "hl.highlightMultiTerm", "hl.highlightMultiTerm", "hl.maxAlternateFieldLength", "hl.maxAlternateFieldLength", "hl.maxAnalyzedChars", "hl.maxAnalyzedChars", "hl.maxMultiValuedToExamine", "hl.maxMultiValuedToExamine", "hl.maxMultiValuedToMatch", "hl.maxMultiValuedToMatch", "hl.mergeContiguous", "hl.mergeContiguous", "hl.preserveMulti", "hl.preserveMulti", "hl.regex.maxAnalyzedChars", "hl.regex.maxAnalyzedChars", "hl.regex.pattern", "hl.regex.pattern ", "hl.regex.slop", "hl.regex.slop", "hl.requireFieldMatch", "hl.requireFieldMatch", "hl.simple.post", "hl.simple.post", "hl.simple.pre", "hl.simple.pre", "hl.snippets", "hl.useFastVectorHighlighter", "hl.useFastVectorHighlighter", "hl.usePhraseHighlighter", "hl.usePhraseHighlighter", "q", "rows", "start", "wt") keys_group <- c("group.query","group.field", 'q', 'start', 'rows', 'sort', 'fq', 'wt', 'group.limit', 'group.offset', 'group.sort', 'group.sort', 'group.format', 'group.func', 'group.main', 'group.ngroups', 'group.cache.percent', 'group.cache.percent', 'fl') keys_all <- c("q", "sort", "start", "rows", "pageDoc", "pageScore", "fl", "fq", "defType", "timeAllowed", "qt", "wt", "NOW", "TZ", "echoHandler") keys_mlt <- c("q", "fq", "fl", "mlt.count", "mlt.fl", "mlt.mintf", "mlt.mindf", "mlt.minwl", "mlt.maxwl", "mlt.maxqt", "mlt.maxntp", "mlt.boost", "mlt.qf", "start", "rows", "wt", "mlt") # nocov end solrium/R/solr_mlt.r0000644000176200001440000000371613176421107014174 0ustar liggesusers#' @title "more like this" search #' #' @description Returns only more like this items #' #' @export #' @template mlt #' @template optimizerows #' @param conn A solrium connection object, see [SolrClient] #' @param params (list) a named list of parameters, results in a GET reqeust #' as long as no body parameters given #' @param body (list) a named list of parameters, if given a POST request #' will be performed #' @return XML, JSON, a list, or data.frame #' @references See \url{http://wiki.apache.org/solr/MoreLikeThis} for more #' information. #' @examples \dontrun{ #' # connect #' (conn <- SolrClient$new(host = "api.plos.org", path = "search", port = NULL)) #' #' # more like this search #' conn$mlt(params = list(q='*:*', mlt.count=2, mlt.fl='abstract', fl='score', #' fq="doc_type:full")) #' conn$mlt(params = list(q='*:*', rows=2, mlt.fl='title', mlt.mindf=1, #' mlt.mintf=1, fl='alm_twitterCount')) #' conn$mlt(params = list(q='title:"ecology" AND body:"cell"', mlt.fl='title', #' mlt.mindf=1, mlt.mintf=1, fl='counter_total_all', rows=5)) #' conn$mlt(params = list(q='ecology', mlt.fl='abstract', fl='title', rows=5)) #' solr_mlt(conn, params = list(q='ecology', mlt.fl='abstract', #' fl=c('score','eissn'), rows=5)) #' solr_mlt(conn, params = list(q='ecology', mlt.fl='abstract', #' fl=c('score','eissn'), rows=5, wt = "xml")) #' #' # get raw data, and parse later if needed #' out <- solr_mlt(conn, params=list(q='ecology', mlt.fl='abstract', fl='title', #' rows=2), raw=TRUE) #' solr_parse(out, "df") #' } solr_mlt <- function(conn, name = NULL, params = NULL, body = NULL, callopts=list(), raw=FALSE, parsetype='df', concat=',', optimizeMaxRows = TRUE, minOptimizedRows = 50000L, ...) { conn$mlt(name = name, params = params, body = body, callopts = callopts, raw = raw, parsetype = parsetype, concat = concat, optimizeMaxRows = optimizeMaxRows, minOptimizedRows = minOptimizedRows, ...) } solrium/R/collection_balanceshardunique.R0000644000176200001440000000332513176211220020337 0ustar liggesusers#' @title Balance a property #' #' @description Insures that a particular property is distributed evenly #' amongst the physical nodes that make up a collection. If the property #' already exists on a replica, every effort is made to leave it there. If the #' property is not on any replica on a shard one is chosen and the property #' is added. #' #' @export #' @inheritParams collection_create #' @param property (character) Required. The property to balance. The literal #' "property." is prepended to this property if not specified explicitly. #' @param onlyactivenodes (logical) Normally, the property is instantiated #' on active nodes only. If `FALSE`, then inactive nodes are also included #' for distribution. Default: `TRUE` #' @param shardUnique (logical) Something of a safety valve. There is one #' pre-defined property (preferredLeader) that defaults this value to `TRUE`. #' For all other properties that are balanced, this must be set to `TRUE` or #' an error message is returned #' @examples \dontrun{ #' (conn <- SolrClient$new()) #' #' # create collection #' if (!conn$collection_exists("addrep")) { #' conn$collection_create(name = "mycollection") #' # OR: bin/solr create -c mycollection #' } #' #' # balance preferredLeader property #' conn$collection_balanceshardunique("mycollection", property = "preferredLeader") #' #' # examine cluster status #' conn$collection_clusterstatus()$cluster$collections$mycollection #' } collection_balanceshardunique <- function(conn, name, property, onlyactivenodes = TRUE, shardUnique = NULL, raw = FALSE, ...) { conn$collection_balanceshardunique(name, property, onlyactivenodes, shardUnique, raw, ...) } solrium/R/collection_createalias.R0000644000176200001440000000210713176257442016773 0ustar liggesusers#' @title Create an alias for a collection #' #' @description Create a new alias pointing to one or more collections. If an #' alias by the same name already exists, this action will replace the existing #' alias, effectively acting like an atomic "MOVE" command. #' #' @export #' @param conn A solrium connection object, see [SolrClient] #' @param alias (character) Required. The alias name to be created #' @param collections (character) Required. A character vector of collections #' to be aliased #' @param raw (logical) If \code{TRUE}, returns raw data #' @param callopts curl options passed on to \code{\link[crul]{HttpClient}} #' @examples \dontrun{ #' (conn <- SolrClient$new()) #' #' if (!conn$collection_exists("thingsstuff")) { #' conn$collection_create(name = "thingsstuff") #' } #' #' conn$collection_createalias("tstuff", "thingsstuff") #' conn$collection_clusterstatus()$cluster$collections$thingsstuff$aliases #' } collection_createalias <- function(conn, alias, collections, raw = FALSE, callopts = list()) { conn$collection_createalias(alias, collections, raw, callopts) } solrium/R/config_get.R0000644000176200001440000000451013167507346014410 0ustar liggesusers#' Get Solr configuration details #' #' @export #' #' @param conn A solrium connection object, see [SolrClient] #' @param name (character) The name of the core. If not given, all cores. #' @param what (character) What you want to look at. One of solrconfig or #' schema. Default: solrconfig #' @param wt (character) One of json (default) or xml. Data type returned. #' If json, uses \code{\link[jsonlite]{fromJSON}} to parse. If xml, uses #' \code{\link[xml2]{read_xml}} to parse. #' @param raw (logical) If `TRUE`, returns raw data in format specified by #' \code{wt} #' @param ... curl options passed on to [crul::HttpClient] #' @return A list, \code{xml_document}, or character #' @details Note that if \code{raw=TRUE}, \code{what} is ignored. That is, #' you get all the data when \code{raw=TRUE}. #' @examples \dontrun{ #' # start Solr with Cloud mode via the schemaless eg: bin/solr -e cloud #' # you can create a new core like: bin/solr create -c corename #' # where is the name for your core - or creaate as below #' #' # connect #' (conn <- SolrClient$new()) #' #' # all config settings #' conn$config_get("gettingstarted") #' #' # just znodeVersion #' conn$config_get("gettingstarted", "znodeVersion") #' #' # just znodeVersion #' conn$config_get("gettingstarted", "luceneMatchVersion") #' #' # just updateHandler #' conn$config_get("gettingstarted", "updateHandler") #' #' # just updateHandler #' conn$config_get("gettingstarted", "requestHandler") #' #' ## Get XML #' conn$config_get("gettingstarted", wt = "xml") #' conn$config_get("gettingstarted", "updateHandler", wt = "xml") #' conn$config_get("gettingstarted", "requestHandler", wt = "xml") #' #' ## Raw data - what param ignored when raw=TRUE #' conn$config_get("gettingstarted", raw = TRUE) #' } config_get <- function(conn, name, what = NULL, wt = "json", raw = FALSE, ...) { conn$config_get(name, what, wt, raw, ...) } config_parse <- function(x, what = NULL, wt, raw) { if (raw) { return(x) } else { switch( wt, json = { tt <- jsonlite::fromJSON(x) if (is.null(what)) { tt } else { tt$config[what] } }, xml = { tt <- xml2::read_xml(x) if (is.null(what)) { tt } else { xml2::xml_find_all(tt, sprintf('//lst[@name="%s"]', what)) } } ) } } solrium/R/collection_requeststatus.R0000644000176200001440000000121313176242356017445 0ustar liggesusers#' @title Get request status #' #' @description Request the status of an already submitted Asynchronous #' Collection API call. This call is also used to clear up the stored statuses. #' #' @export #' @param requestid (character) Required. The user defined request-id for the #' request. This can be used to track the status of the submitted asynchronous #' task. `-1` is a special request id which is used to cleanup the stored #' states for all of the already completed/failed tasks. #' @inheritParams collection_create collection_requeststatus <- function(conn, requestid, raw = FALSE, ...) { conn$collection_requeststatus(requestid, raw, ...) } solrium/R/classes.r0000644000176200001440000000052112252251410013755 0ustar liggesusers#' Test for sr_facet class #' @export #' @param x Input #' @rdname is-sr is.sr_facet <- function(x) inherits(x, "sr_facet") #' Test for sr_high class #' @export #' @rdname is-sr is.sr_high <- function(x) inherits(x, "sr_high") #' Test for sr_search class #' @export #' @rdname is-sr is.sr_search <- function(x) inherits(x, "sr_search")solrium/R/core_reload.R0000644000176200001440000000111613167507346014561 0ustar liggesusers#' Reload a core #' #' @export #' #' @inheritParams core_create #' @examples \dontrun{ #' # start Solr with Schemaless mode via the schemaless eg: #' # bin/solr start -e schemaless #' # you can create a new core like: bin/solr create -c corename #' # where is the name for your core - or creaate as below #' #' # connect #' (conn <- SolrClient$new()) #' #' # Status of particular cores #' conn$core_reload("gettingstarted") #' conn$core_status("gettingstarted") #' } core_reload <- function(conn, name, raw = FALSE, callopts=list()) { conn$core_reload(name, raw, callopts) } solrium/R/SolrClient.R0000644000176200001440000012451313176437407014370 0ustar liggesusers#' Solr connection client #' #' @export #' @param host (character) Host url. Deafault: 127.0.0.1 #' @param path (character) url path. #' @param port (character/numeric) Port. Default: 8389 #' @param scheme (character) http scheme, one of http or https. Default: http #' @param proxy List of arguments for a proxy connection, including one or #' more of: url, port, username, password, and auth. See #' [crul::proxy] for help, which is used to construct the #' proxy connection. #' @param errors (character) One of `"simple"` or `"complete"`. Simple gives #' http code and error message on an error, while complete gives both http #' code and error message, and stack trace, if available. #' #' @return Various output, see help files for each grouping of methods. #' #' @details `SolrClient` creates a R6 class object. The object is #' not cloneable and is portable, so it can be inherited across packages #' without complication. #' #' `SolrClient` is used to initialize a client that knows about your #' Solr instance, with options for setting host, port, http scheme, #' and simple vs. complete error reporting #' #' @section SolrClient methods: #' #' Each of these methods also has a matching standalone exported #' function that you can use by passing in the connection object made #' by calling `SolrClient$new()`. Also, see the docs for each method for #' parameter definitions and their default values. #' #' * `ping(name, wt = 'json', raw = FALSE, ...)` #' * `schema(name, what = '', raw = FALSE, ...)` #' * `commit(name, expunge_deletes = FALSE, wait_searcher = TRUE, #' soft_commit = FALSE, wt = 'json', raw = FALSE, ...)` #' * `optimize(name, max_segments = 1, wait_searcher = TRUE, #' soft_commit = FALSE, wt = 'json', raw = FALSE, ...)` #' * `config_get(name, what = NULL, wt = "json", raw = FALSE, ...)` #' * `config_params(name, param = NULL, set = NULL, unset = NULL, #' update = NULL, ...)` #' * `config_overlay(name, omitHeader = FALSE, ...)` #' * `config_set(name, set = NULL, unset = NULL, ...)` #' * `collection_exists(name, ...)` #' * `collection_list(raw = FALSE, ...)` #' * `collection_create(name, numShards = 1, maxShardsPerNode = 1, #' createNodeSet = NULL, collection.configName = NULL, replicationFactor = 1, #' router.name = NULL, shards = NULL, createNodeSet.shuffle = TRUE, #' router.field = NULL, autoAddReplicas = FALSE, async = NULL, raw = FALSE, #' callopts=list(), ...)` #' * `collection_addreplica(name, shard = NULL, route = NULL, node = NULL, #' instanceDir = NULL, dataDir = NULL, async = NULL, raw = FALSE, #' callopts=list(), ...)` #' * `collection_addreplicaprop(name, shard, replica, property, property.value, #' shardUnique = FALSE, raw = FALSE, callopts=list())` #' * `collection_addrole(role = "overseer", node, raw = FALSE, ...)` #' * `collection_balanceshardunique(name, property, onlyactivenodes = TRUE, #' shardUnique = NULL, raw = FALSE, ...)` #' * `collection_clusterprop(name, val, raw = FALSE, callopts=list())` #' * `collection_clusterstatus(name = NULL, shard = NULL, raw = FALSE, ...)` #' * `collection_createalias(alias, collections, raw = FALSE, ...)` #' * `collection_createshard(name, shard, createNodeSet = NULL, #' raw = FALSE, ...)` #' * `collection_delete(name, raw = FALSE, ...)` #' * `collection_deletealias(alias, raw = FALSE, ...)` #' * `collection_deletereplica(name, shard = NULL, replica = NULL, #' onlyIfDown = FALSE, raw = FALSE, callopts=list(), ...)` #' * `collection_deletereplicaprop(name, shard, replica, property, raw = FALSE, #' callopts=list())` #' * `collection_deleteshard(name, shard, raw = FALSE, ...)` #' * `collection_migrate(name, target.collection, split.key, #' forward.timeout = NULL, async = NULL, raw = FALSE, ...)` #' * `collection_overseerstatus(raw = FALSE, ...)` #' * `collection_rebalanceleaders(name, maxAtOnce = NULL, maxWaitSeconds = NULL, #' raw = FALSE, ...)` #' * `collection_reload(name, raw = FALSE, ...)` #' * `collection_removerole(role = "overseer", node, raw = FALSE, ...)` #' * `collection_requeststatus(requestid, raw = FALSE, ...)` #' * `collection_splitshard(name, shard, ranges = NULL, split.key = NULL, #' async = NULL, raw = FALSE, ...)` #' * `core_status(name = NULL, indexInfo = TRUE, raw = FALSE, callopts=list())` #' * `core_exists(name, callopts = list())` #' * `core_create(name, instanceDir = NULL, config = NULL, schema = NULL, #' dataDir = NULL, configSet = NULL, collection = NULL, shard = NULL, #' async=NULL, raw = FALSE, callopts=list(), ...)` #' * `core_unload(name, deleteIndex = FALSE, deleteDataDir = FALSE, #' deleteInstanceDir = FALSE, async = NULL, raw = FALSE, callopts = list())` #' * `core_rename(name, other, async = NULL, raw = FALSE, callopts=list())` #' * `core_reload(name, raw = FALSE, callopts=list())` #' * `core_swap(name, other, async = NULL, raw = FALSE, callopts=list())` #' * `core_mergeindexes(name, indexDir = NULL, srcCore = NULL, async = NULL, #' raw = FALSE, callopts = list())` #' * `core_requeststatus(requestid, raw = FALSE, callopts = list())` #' * `core_split(name, path = NULL, targetCore = NULL, ranges = NULL, #' split.key = NULL, async = NULL, raw = FALSE, callopts=list())` #' * `search(name = NULL, params = NULL, body = NULL, callopts = list(), #' raw = FALSE, parsetype = 'df', concat = ',', optimizeMaxRows = TRUE, #' minOptimizedRows = 50000L, ...)` #' * `facet(name = NULL, params = NULL, body = NULL, callopts = list(), #' raw = FALSE, parsetype = 'df', concat = ',', ...)` #' * `stats(name = NULL, params = list(q = '*:*', stats.field = NULL, #' stats.facet = NULL), body = NULL, callopts=list(), raw = FALSE, #' parsetype = 'df', ...)` #' * `highlight(name = NULL, params = NULL, body = NULL, callopts=list(), #' raw = FALSE, parsetype = 'df', ...)` #' * `group(name = NULL, params = NULL, body = NULL, callopts=list(), #' raw=FALSE, parsetype='df', concat=',', ...)` #' * `mlt(name = NULL, params = NULL, body = NULL, callopts=list(), #' raw=FALSE, parsetype='df', concat=',', optimizeMaxRows = TRUE, #' minOptimizedRows = 50000L, ...)` #' * `all(name = NULL, params = NULL, body = NULL, callopts=list(), #' raw=FALSE, parsetype='df', concat=',', optimizeMaxRows = TRUE, #' minOptimizedRows = 50000L, ...)` #' * `get(ids, name, fl = NULL, wt = 'json', raw = FALSE, ...)` #' * `add(x, name, commit = TRUE, commit_within = NULL, overwrite = TRUE, #' boost = NULL, wt = 'json', raw = FALSE, ...)` #' * `delete_by_id(ids, name, commit = TRUE, commit_within = NULL, #' overwrite = TRUE, boost = NULL, wt = 'json', raw = FALSE, ...)` #' * `delete_by_query(query, name, commit = TRUE, commit_within = NULL, #' overwrite = TRUE, boost = NULL, wt = 'json', raw = FALSE, ...)` #' * `update_json(files, name, commit = TRUE, optimize = FALSE, #' max_segments = 1, expunge_deletes = FALSE, wait_searcher = TRUE, #' soft_commit = FALSE, prepare_commit = NULL, wt = 'json', raw = FALSE, ...)` #' * `update_xml(files, name, commit = TRUE, optimize = FALSE, max_segments = 1, #' expunge_deletes = FALSE, wait_searcher = TRUE, soft_commit = FALSE, #' prepare_commit = NULL, wt = 'json', raw = FALSE, ...)` #' * `update_csv(files, name, separator = ',', header = TRUE, fieldnames = NULL, #' skip = NULL, skipLines = 0, trim = FALSE, encapsulator = NULL, #' escape = NULL, keepEmpty = FALSE, literal = NULL, map = NULL, split = NULL, #' rowid = NULL, rowidOffset = NULL, overwrite = NULL, commit = NULL, #' wt = 'json', raw = FALSE, ...)` #' * `update_atomic_json(body, name, wt = 'json', raw = FALSE, ...)` #' * `update_atomic_xml(body, name, wt = 'json', raw = FALSE, ...)` #' #' @format NULL #' @usage NULL #' #' @examples \dontrun{ #' # make a client #' (cli <- SolrClient$new()) #' #' # variables #' cli$host #' cli$port #' cli$path #' cli$scheme #' #' # ping #' ## ping to make sure it's up #' cli$ping("gettingstarted") #' #' # version #' ## get Solr version information #' cli$schema("gettingstarted") #' cli$schema("gettingstarted", "fields") #' cli$schema("gettingstarted", "name") #' cli$schema("gettingstarted", "version")$version #' #' # Search #' cli$search("gettingstarted", params = list(q = "*:*")) #' cli$search("gettingstarted", body = list(query = "*:*")) #' #' # set a different host #' SolrClient$new(host = 'stuff.com') #' #' # set a different port #' SolrClient$new(host = 3456) #' #' # set a different http scheme #' SolrClient$new(scheme = 'https') #' #' # set a proxy #' SolrClient$new(proxy = list(url = "187.62.207.130:3128")) #' #' prox <- list(url = "187.62.207.130:3128", user = "foo", pwd = "bar") #' cli <- SolrClient$new(proxy = prox) #' cli$proxy #' #' # A remote Solr instance to which you don't have admin access #' (cli <- SolrClient$new(host = "api.plos.org", path = "search", port = NULL)) #' cli$search(params = list(q = "memory")) #' } SolrClient <- R6::R6Class( "SolrClient", portable = TRUE, cloneable = FALSE, public = list( host = "127.0.0.1", port = 8983, path = NULL, scheme = 'http', proxy = NULL, errors = "simple", initialize = function(host, path, port, scheme, proxy, errors) { if (!missing(host)) self$host <- host if (!missing(path)) self$path <- path if (!missing(port)) self$port <- port if (!missing(scheme)) self$scheme <- scheme if (!missing(proxy)) self$proxy <- private$make_proxy(proxy) if (!missing(errors)) self$errors <- private$lint_errors(errors) }, print = function(...) { cat('', sep = "\n") cat(paste0(' host: ', self$host), sep = "\n") cat(paste0(' path: ', self$path), sep = "\n") cat(paste0(' port: ', self$port), sep = "\n") cat(paste0(' scheme: ', self$scheme), sep = "\n") cat(paste0(' errors: ', self$errors), sep = "\n") cat(" proxy:", sep = "\n") if (is.null(self$proxy)) { } else { cat(paste0(" url: ", self$proxy$proxy), sep = "\n") cat(paste0(" port: ", self$proxy$proxyport)) } }, # Admin methods ping = function(name, wt = 'json', raw = FALSE, ...) { path <- sprintf('solr/%s/admin/ping', name) res <- tryCatch( solr_GET(self$make_url(), path = path, args = list(wt = wt), callopts = list(...)), error = function(e) e ) if (inherits(res, "error")) { return(list(status = "not found")) } else { out <- structure(res, class = "ping", wt = wt) if (raw) return( out ) solr_parse(out) } }, schema = function(name, what = '', raw = FALSE, ...) { res <- solr_GET(self$make_url(), sprintf('solr/%s/schema/%s', name, what), list(wt = "json"), ...) if (raw) return(res) jsonlite::fromJSON(res) }, commit = function(name, expunge_deletes = FALSE, wait_searcher = TRUE, soft_commit = FALSE, wt = 'json', raw = FALSE, ...) { obj_proc(self$make_url(), sprintf('solr/%s/update', name), body = list(commit = list(expungeDeletes = asl(expunge_deletes), waitSearcher = asl(wait_searcher), softCommit = asl(soft_commit))), args = list(wt = wt), raw = raw, self$proxy, ...) }, optimize = function(name, max_segments = 1, wait_searcher = TRUE, soft_commit = FALSE, wt = 'json', raw = FALSE, ...) { obj_proc(self$make_url(), sprintf('solr/%s/update', name), body = list(optimize = list(maxSegments = max_segments, waitSearcher = asl(wait_searcher), softCommit = asl(soft_commit))), args = list(wt = wt), raw = raw, self$proxy, ...) }, config_get = function(name, what = NULL, wt = "json", raw = FALSE, ...) { res <- solr_GET(self$make_url(), sprintf('solr/%s/config', name), sc(list(wt = wt)), self$proxy, ...) config_parse(res, what, wt, raw) }, config_params = function(name, param = NULL, set = NULL, unset = NULL, update = NULL, ...) { if (all(vapply(list(set, unset, update), is.null, logical(1)))) { if (is.null(param)) { url <- sprintf('solr/%s/config/params', name) } else { url <- sprintf('solr/%s/config/params/%s', name, param) } res <- solr_GET(self$make_url(), sprintf('solr/%s/config/params/%s', name, param), list(wt = "json"), list(...), self$proxy) } else { path <- sprintf('solr/%s/config/params', name) body <- sc(c(name_by(unbox_if(set, TRUE), "set"), name_by(unbox_if(unset, TRUE), "unset"), name_by(unbox_if(update, TRUE), "update"))) res <- solr_POST_body(self$make_url(), path, body, list(wt = "json"), ctype_json(), list(...), self$proxy) } jsonlite::fromJSON(res) }, config_overlay = function(name, omitHeader = FALSE, ...) { args <- sc(list(wt = "json", omitHeader = asl(omitHeader))) res <- solr_GET(self$make_url(), sprintf('solr/%s/config/overlay', name), args, self$proxy, ...) jsonlite::fromJSON(res) }, config_set = function(name, set = NULL, unset = NULL, ...) { body <- sc(list(`set-property` = unbox_if(set), `unset-property` = unset)) res <- solr_POST_body(self$make_url(), sprintf('solr/%s/config', name), body, list(wt = "json"), ctype_json(), list(...), self$proxy) jsonlite::fromJSON(res) }, # Collection methods collection_exists = function(name, ...) { name %in% suppressMessages(self$collection_list(...))$collections }, collection_list = function(raw = FALSE, callopts = list()) { private$coll_h(sc(list(action = 'LIST', wt = 'json')), callopts, raw) }, collection_create = function(name, numShards = 1, maxShardsPerNode = 1, createNodeSet = NULL, collection.configName = NULL, replicationFactor = 1, router.name = NULL, shards = NULL, createNodeSet.shuffle = TRUE, router.field = NULL, autoAddReplicas = FALSE, async = NULL, raw = FALSE, callopts=list()) { args <- sc(list(action = 'CREATE', name = name, numShards = numShards, replicationFactor = replicationFactor, maxShardsPerNode = maxShardsPerNode, createNodeSet = createNodeSet, collection.configName = collection.configName, router.name = router.name, shards = shards, createNodeSet.shuffle = asl(createNodeSet.shuffle), router.field = router.field, autoAddReplicas = asl(autoAddReplicas), async = async, wt = 'json')) private$coll_h(args, callopts, raw) }, collection_addreplica = function(name, shard = NULL, route = NULL, node = NULL, instanceDir = NULL, dataDir = NULL, async = NULL, raw = FALSE, callopts=list()) { args <- sc(list(action = 'ADDREPLICA', collection = name, shard = shard, route = route, node = node, instanceDir = instanceDir, dataDir = dataDir, async = async, wt = 'json')) private$coll_h(args, callopts, raw) }, collection_addreplicaprop = function(name, shard, replica, property, property.value, shardUnique = FALSE, raw = FALSE, callopts=list()) { args <- sc(list(action = 'ADDREPLICAPROP', collection = name, shard = shard, replica = replica, property = property, property.value = property.value, shardUnique = asl(shardUnique), wt = 'json')) private$coll_h(args, callopts, raw) }, collection_addrole = function(role = "overseer", node, raw = FALSE, callopts = list(), ...) { args <- sc(list(action = 'ADDROLE', role = role, node = node, wt = 'json')) private$coll_h(args, callopts, raw) }, collection_balanceshardunique = function(name, property, onlyactivenodes = TRUE, shardUnique = NULL, raw = FALSE, callopts = list()) { args <- sc(list(action = 'BALANCESHARDUNIQUE', collection = name, property = property, onlyactivenodes = asl(onlyactivenodes), shardUnique = asl(shardUnique), wt = 'json')) private$coll_h(args, callopts, raw) }, collection_clusterprop = function(name, val, raw = FALSE, callopts=list()) { args <- sc(list(action = 'CLUSTERPROP', name = name, val = if (is.null(val)) "" else val, wt = 'json')) private$coll_h(args, callopts, raw) }, collection_clusterstatus = function(name = NULL, shard = NULL, raw = FALSE, callopts = list()) { shard <- check_shard(shard) args <- sc(list(action = 'CLUSTERSTATUS', collection = name, shard = shard, wt = 'json')) private$coll_h(args, callopts, raw) }, collection_createalias = function(alias, collections, raw = FALSE, callopts = list()) { collections <- check_shard(collections) args <- sc(list(action = 'CREATEALIAS', name = alias, collections = collections, wt = 'json')) private$coll_h(args, callopts, raw) }, collection_createshard = function(name, shard, createNodeSet = NULL, raw = FALSE, callopts = list()) { args <- sc(list(action = 'CREATESHARD', collection = name, shard = shard, createNodeSet = createNodeSet, wt = 'json')) private$coll_h(args, callopts, raw) }, collection_delete = function(name, raw = FALSE, callopts = list()) { args <- sc(list(action = 'DELETE', name = name, wt = 'json')) private$coll_h(args, callopts, raw) }, collection_deletealias = function(alias, raw = FALSE, callopts = list()) { args <- sc(list(action = 'DELETEALIAS', name = alias, wt = 'json')) private$coll_h(args, callopts, raw) }, collection_deletereplica = function(name, shard = NULL, replica = NULL, onlyIfDown = FALSE, raw = FALSE, callopts=list(), ...) { args <- sc(list(action = 'DELETEREPLICA', collection = name, shard = shard, replica = replica, onlyIfDown = asl(onlyIfDown), wt = 'json')) private$coll_h(args, callopts, raw) }, collection_deletereplicaprop = function(name, shard, replica, property, raw = FALSE, callopts=list()) { args <- sc(list(action = 'DELETEREPLICAPROP', collection = name, shard = shard, replica = replica, property = property, wt = 'json')) private$coll_h(args, callopts, raw) }, collection_deleteshard = function(name, shard, raw = FALSE, callopts = list()) { args <- sc(list(action = 'DELETESHARD', collection = name, shard = shard, wt = 'json')) private$coll_h(args, callopts, raw) }, collection_migrate = function(name, target.collection, split.key, forward.timeout = NULL, async = NULL, raw = FALSE, callopts = list()) { args <- sc(list(action = 'MIGRATE', collection = name, target.collection = target.collection, split.key = split.key, forward.timeout = forward.timeout, async = async, wt = 'json')) private$coll_h(args, callopts, raw) }, collection_overseerstatus = function(raw = FALSE, callopts = list()) { args <- sc(list(action = 'OVERSEERSTATUS', wt = 'json')) private$coll_h(args, callopts, raw) }, collection_rebalanceleaders = function(name, maxAtOnce = NULL, maxWaitSeconds = NULL, raw = FALSE, callopts = list()) { args <- sc(list(action = 'REBALANCELEADERS', collection = name, maxAtOnce = maxAtOnce, maxWaitSeconds = maxWaitSeconds, wt = 'json')) private$coll_h(args, callopts, raw) }, collection_reload = function(name, raw = FALSE, callopts = list()) { args <- sc(list(action = 'RELOAD', name = name, wt = 'json')) private$coll_h(args, callopts, raw) }, collection_removerole = function(role = "overseer", node, raw = FALSE, callopts = list()) { args <- sc(list(action = 'REMOVEROLE', role = role, node = node, wt = 'json')) private$coll_h(args, callopts, raw) }, collection_requeststatus = function(requestid, raw = FALSE, callopts = list()) { args <- sc(list(action = 'REQUESTSTATUS', requestid = requestid, wt = 'json')) private$coll_h(args, callopts, raw) }, collection_splitshard = function(name, shard, ranges = NULL, split.key = NULL, async = NULL, raw = FALSE, callopts = list()) { args <- sc(list(action = 'SPLITSHARD', collection = name, shard = shard, ranges = do_ranges(ranges), split.key = split.key, async = async, wt = 'json')) private$coll_h(args, callopts, raw) }, # Core methods core_status = function(name = NULL, indexInfo = TRUE, raw = FALSE, callopts=list()) { args <- sc(list(action = 'STATUS', core = name, indexInfo = asl(indexInfo), wt = 'json')) res <- solr_GET(self$make_url(), 'solr/admin/cores', args, callopts, self$proxy) if (raw) res else jsonlite::fromJSON(res) }, core_exists = function(name, callopts = list()) { tmp <- suppressMessages(self$core_status(name = name, callopts = callopts)) length(tmp$status[[1]]) > 0 }, core_create = function(name, instanceDir = NULL, config = NULL, schema = NULL, dataDir = NULL, configSet = NULL, collection = NULL, shard = NULL, async=NULL, raw = FALSE, callopts=list(), ...) { args <- sc(list(action = 'CREATE', name = name, instanceDir = instanceDir, config = config, schema = schema, dataDir = dataDir, configSet = configSet, collection = collection, shard = shard, async = async, wt = 'json')) res <- solr_GET(self$make_url(), 'solr/admin/cores', args, callopts, self$proxy) if (raw) res else jsonlite::fromJSON(res) }, core_unload = function(name, deleteIndex = FALSE, deleteDataDir = FALSE, deleteInstanceDir = FALSE, async = NULL, raw = FALSE, callopts = list()) { args <- sc(list(action = 'UNLOAD', core = name, deleteIndex = asl(deleteIndex), deleteDataDir = asl(deleteDataDir), deleteInstanceDir = asl(deleteInstanceDir), async = async, wt = 'json')) res <- solr_GET(self$make_url(), 'solr/admin/cores', args, callopts, self$proxy) if (raw) res else jsonlite::fromJSON(res) }, core_rename = function(name, other, async = NULL, raw = FALSE, callopts=list()) { args <- sc(list(action = 'RENAME', core = name, other = other, async = async, wt = 'json')) res <- solr_GET(self$make_url(), 'solr/admin/cores', args, callopts, self$proxy) if (raw) res else jsonlite::fromJSON(res) }, core_reload = function(name, raw = FALSE, callopts=list()) { args <- sc(list(action = 'RELOAD', core = name, wt = 'json')) res <- solr_GET(self$make_url(), 'solr/admin/cores', args, callopts, self$proxy) if (raw) res else jsonlite::fromJSON(res) }, core_swap = function(name, other, async = NULL, raw = FALSE, callopts=list()) { if (is_in_cloud_mode(self)) stop("You are in SolrCloud mode, stopping", call. = FALSE) args <- sc(list(action = 'SWAP', core = name, other = other, async = async, wt = 'json')) res <- solr_GET(self$make_url(), 'solr/admin/cores', args, callopts, self$proxy) if (raw) res else jsonlite::fromJSON(res) }, core_mergeindexes = function(name, indexDir = NULL, srcCore = NULL, async = NULL, raw = FALSE, callopts = list()) { args <- sc(list(action = 'MERGEINDEXES', core = name, indexDir = indexDir, srcCore = srcCore, async = async, wt = 'json')) res <- solr_GET(self$make_url(), 'solr/admin/cores', args, callopts, self$proxy) if (raw) res else jsonlite::fromJSON(res) }, core_requeststatus = function(requestid, raw = FALSE, callopts = list()) { args <- sc(list(action = 'REQUESTSTATUS', requestid = requestid, wt = 'json')) res <- solr_GET(self$make_url(), 'solr/admin/cores', args, callopts, self$proxy) if (raw) res else jsonlite::fromJSON(res) }, core_split = function(name, path = NULL, targetCore = NULL, ranges = NULL, split.key = NULL, async = NULL, raw = FALSE, callopts=list()) { args <- sc(list(action = 'SPLIT', core = name, ranges = do_ranges(ranges), split.key = split.key, async = async, wt = 'json')) args <- c(args, make_args(path), make_args(targetCore)) res <- solr_GET(self$make_url(), 'solr/admin/cores', args, callopts, self$proxy) if (raw) res else jsonlite::fromJSON(res) }, # Search methods search = function(name = NULL, params = NULL, body = NULL, callopts = list(), raw = FALSE, parsetype = 'df', concat = ',', optimizeMaxRows = TRUE, minOptimizedRows = 50000L, ...) { if (is.null(params)) { if (is.null(body)) stop("if 'params' NULL, body must be given") } stopifnot(inherits(params, "list") || is.null(params)) stopifnot(inherits(body, "list") || is.null(body)) if (!is.null(params) && length(params) > 0) { params$rows <- private$adjust_rows(params, optimizeMaxRows, minOptimizedRows, name) } if (!is.null(body) && length(body) > 0) { body$rows <- private$adjust_rows(body, optimizeMaxRows, minOptimizedRows, name) } if (!is.null(params)) params <- check_args_search(params, "fq", ...) if (!is.null(body)) body <- check_args_search(body, "fq", ...) if (!is.null(body)) { res <- solr_POST_body(self$make_url(), if (!is.null(name)) url_handle(name) else self$path, body, params, ctype_json(), callopts, self$proxy) out <- structure(res, class = "sr_search", wt = params$wt) } else { res <- solr_GET(self$make_url(), if (!is.null(name)) url_handle(name) else self$path, params, callopts, self$proxy) out <- structure(res, class = "sr_search", wt = params$wt) } if (raw) { return( out ) } else { parsed <- cont_parse(out, params$wt %||% body$wt %||% "json") parsed <- structure(parsed, class = c(class(parsed), "sr_search")) solr_parse(parsed, parsetype, concat) } }, facet = function(name = NULL, params = NULL, body = NULL, callopts = list(), raw = FALSE, parsetype = 'df', concat = ',', ...) { if (is.null(params)) { if (is.null(body)) stop("if 'params' NULL, body must be given") } stopifnot(inherits(params, "list") || is.null(params)) stopifnot(inherits(body, "list") || is.null(body)) if (!is.null(params)) params <- check_args_facet(params, keys_facet, ...) if (!is.null(body)) body <- check_args_facet(body, keys_facet, ...) if (!is.null(body)) { res <- solr_POST_body(self$make_url(), if (!is.null(name)) url_handle(name) else self$path, body, params, ctype_json(), callopts, self$proxy) out <- structure(res, class = "sr_facet", wt = params$wt) } else { res <- solr_GET(self$make_url(), if (!is.null(name)) url_handle(name) else self$path, params, callopts, self$proxy) out <- structure(res, class = "sr_facet", wt = params$wt) } if (raw) { return( out ) } else { parsed <- cont_parse(out, params$wt %||% body$wt %||% "json") parsed <- structure(parsed, class = c(class(parsed), "sr_facet")) solr_parse(parsed) } }, stats = function(name = NULL, params = list(q = '*:*', stats.field = NULL, stats.facet = NULL), body = NULL, callopts=list(), raw = FALSE, parsetype = 'df', ...) { if (is.null(params)) { if (is.null(body)) stop("if 'params' NULL, body must be given") } stopifnot(inherits(params, "list") || is.null(body)) stopifnot(inherits(body, "list") || is.null(body)) if (!is.null(params)) params <- check_args_stats(params, keys_stats, ...) if (!is.null(body)) body <- check_args_stats(body, keys_stats, ...) if (!is.null(body)) { res <- solr_POST_body(self$make_url(), if (!is.null(name)) url_handle(name) else self$path, body, params, ctype_json(), callopts, self$proxy) out <- structure(res, class = "sr_stats", wt = params$wt) } else { res <- solr_GET(self$make_url(), if (!is.null(name)) url_handle(name) else self$path, params, callopts, self$proxy) out <- structure(res, class = "sr_stats", wt = params$wt) } if (raw) { return( out ) } else { parsed <- cont_parse(out, params$wt %||% body$wt %||% "json") parsed <- structure(parsed, class = c(class(parsed), "sr_stats")) solr_parse(out, parsetype) } }, highlight = function(name = NULL, params = NULL, body = NULL, callopts=list(), raw = FALSE, parsetype = 'df', ...) { if (is.null(params)) { if (is.null(body)) stop("if 'params' NULL, body must be given") } stopifnot(inherits(params, "list") || is.null(body)) stopifnot(inherits(body, "list") || is.null(body)) if (!is.null(params)) params <- check_args_high(params, keys_high, ...) if (!is.null(body)) body <- check_args_high(body, keys_high, ...) if (!is.null(body)) { res <- solr_POST_body(self$make_url(), if (!is.null(name)) url_handle(name) else self$path, body, params, callopts, self$proxy) out <- structure(res, class = "sr_high", wt = params$wt) } else { res <- solr_GET(self$make_url(), if (!is.null(name)) url_handle(name) else self$path, params, callopts, self$proxy) out <- structure(res, class = "sr_high", wt = params$wt) } if (raw) { return(out) } else { parsed <- cont_parse(out, params$wt %||% body$wt %||% "json") parsed <- structure(parsed, class = c(class(parsed), "sr_high")) solr_parse(out, parsetype) } }, group = function(name = NULL, params = NULL, body = NULL, callopts=list(), raw=FALSE, parsetype='df', concat=',', ...) { if (is.null(params)) { if (is.null(body)) stop("if 'params' NULL, body must be given") } stopifnot(inherits(params, "list") || is.null(params)) stopifnot(inherits(body, "list") || is.null(body)) if (!is.null(params)) params <- check_args_group(params, keys_group, ...) if (!is.null(body)) body <- check_args_group(body, keys_group, ...) if (!is.null(body)) { res <- solr_POST_body( self$make_url(), if (!is.null(name)) url_handle(name) else self$path, body, params, ctype_json(), callopts, self$proxy) out <- structure(res, class = "sr_group", wt = body$wt) } else { res <- solr_GET(self$make_url(), if (!is.null(name)) url_handle(name) else self$path, params, callopts, self$proxy) out <- structure(res, class = "sr_group", wt = params$wt) } if (raw) { return(out) } else { parsed <- cont_parse(out, params$wt %||% body$wt %||% "json") parsed <- structure(parsed, class = c(class(parsed), "sr_group")) solr_parse(out, parsetype) } }, mlt = function(name = NULL, params = NULL, body = NULL, callopts=list(), raw=FALSE, parsetype='df', concat=',', optimizeMaxRows = TRUE, minOptimizedRows = 50000L, ...) { if (is.null(params)) { if (is.null(body)) stop("if 'params' NULL, body must be given") } stopifnot(inherits(params, "list") || is.null(params)) stopifnot(inherits(body, "list") || is.null(body)) if (!is.null(params) && length(params) > 0) { params$rows <- private$adjust_rows(params, optimizeMaxRows, minOptimizedRows, name) } if (!is.null(body) && length(body) > 0) { body$rows <- private$adjust_rows(body, optimizeMaxRows, minOptimizedRows, name) } if (!is.null(params)) params <- check_args_mlt(params, keys_mlt, ...) if (!is.null(body)) body <- check_args_mlt(body, keys_mlt, ...) if (!is.null(body)) { res <- solr_POST_body( self$make_url(), if (!is.null(name)) url_handle(name) else self$path, body, params, ctype_json(), callopts, self$proxy) out <- structure(res, class = "sr_mlt", wt = body$wt) } else { res <- solr_GET(self$make_url(), if (!is.null(name)) url_handle(name) else self$path, params, callopts, self$proxy) out <- structure(res, class = "sr_mlt", wt = params$wt) } if (raw) { return( out ) } else { parsed <- cont_parse(out, params$wt %||% body$wt %||% "json") parsed <- structure(parsed, class = c(class(parsed), "sr_mlt")) solr_parse(parsed, parsetype, concat) } }, all = function(name = NULL, params = NULL, body = NULL, callopts=list(), raw=FALSE, parsetype='df', concat=',', optimizeMaxRows = TRUE, minOptimizedRows = 50000L, ...) { if (is.null(params)) { if (is.null(body)) stop("if 'params' NULL, body must be given") } stopifnot(inherits(params, "list") || is.null(params)) stopifnot(inherits(body, "list") || is.null(body)) if (!is.null(params) && length(params) > 0) { params$rows <- private$adjust_rows(params, optimizeMaxRows, minOptimizedRows, name) } if (!is.null(body) && length(body) > 0) { body$rows <- private$adjust_rows(body, optimizeMaxRows, minOptimizedRows, name) } if (!is.null(params)) params <- check_args_search(params, keys_all, ...) if (!is.null(body)) body <- check_args_search(body, keys_all, ...) if (!is.null(body)) { res <- solr_POST_body( self$make_url(), if (!is.null(name)) url_handle(name) else self$path, body, params, ctype_json(), callopts, self$proxy) out <- structure(res, class = "sr_all", wt = body$wt) } else { res <- solr_GET(self$make_url(), if (!is.null(name)) url_handle(name) else self$path, params, callopts, self$proxy) out <- structure(res, class = "sr_all", wt = params$wt) } if (raw) { return( out ) } else { parsed <- cont_parse(out, params$wt %||% body$wt %||% "json") parsed <- structure(parsed, class = c(class(parsed), "sr_all")) solr_parse(parsed, parsetype, concat) } }, # documents methods get = function(ids, name, fl = NULL, wt = 'json', raw = FALSE, ...) { if (!is.null(fl)) fl <- paste0(fl, collapse = ",") args <- sc(list(ids = paste0(ids, collapse = ","), fl = fl, wt = wt)) res <- solr_GET(self$make_url(), sprintf('solr/%s/get', name), args, self$proxy, ...) config_parse(res, wt = wt, raw = raw) }, add = function(x, name, commit = TRUE, commit_within = NULL, overwrite = TRUE, boost = NULL, wt = 'json', raw = FALSE, ...) { args <- sc(list(commit = asl(commit), commitWithin = commit_within, overwrite = asl(overwrite), wt = wt)) obj_proc(self$make_url(), sprintf('solr/%s/update/json/docs', name), x, args, raw, self$proxy, ...) }, delete_by_id = function(ids, name, commit = TRUE, commit_within = NULL, overwrite = TRUE, boost = NULL, wt = 'json', raw = FALSE, ...) { args <- sc(list(commit = asl(commit), wt = wt)) body <- list(delete = lapply(ids, function(z) list(id = z))) obj_proc(self$make_url(), sprintf('solr/%s/update/json', name), body, args, raw, self$proxy, ...) }, delete_by_query = function(query, name, commit = TRUE, commit_within = NULL, overwrite = TRUE, boost = NULL, wt = 'json', raw = FALSE, ...) { args <- sc(list(commit = asl(commit), wt = wt)) body <- list(delete = list(query = query)) obj_proc(self$make_url(), sprintf('solr/%s/update/json', name), body, args, raw, self$proxy, ...) }, update_json = function(files, name, commit = TRUE, optimize = FALSE, max_segments = 1, expunge_deletes = FALSE, wait_searcher = TRUE, soft_commit = FALSE, prepare_commit = NULL, wt = 'json', raw = FALSE, ...) { private$stop_if_absent(name) args <- sc(list(commit = asl(commit), optimize = asl(optimize), maxSegments = max_segments, expungeDeletes = asl(expunge_deletes), waitSearcher = asl(wait_searcher), softCommit = asl(soft_commit), prepareCommit = prepare_commit, wt = wt)) docreate(self$make_url(), sprintf('solr/%s/update/json/docs', name), crul::upload(files), args, ctype_json(), raw, self$proxy, ...) }, update_xml = function(files, name, commit = TRUE, optimize = FALSE, max_segments = 1, expunge_deletes = FALSE, wait_searcher = TRUE, soft_commit = FALSE, prepare_commit = NULL, wt = 'json', raw = FALSE, ...) { private$stop_if_absent(name) args <- sc( list(commit = asl(commit), optimize = asl(optimize), maxSegments = max_segments, expungeDeletes = asl(expunge_deletes), waitSearcher = asl(wait_searcher), softCommit = asl(soft_commit), prepareCommit = prepare_commit, wt = wt)) docreate(self$make_url(), sprintf('solr/%s/update', name), crul::upload(files), args, ctype_xml(), raw, self$proxy, ...) }, update_csv = function(files, name, separator = ',', header = TRUE, fieldnames = NULL, skip = NULL, skipLines = 0, trim = FALSE, encapsulator = NULL, escape = NULL, keepEmpty = FALSE, literal = NULL, map = NULL, split = NULL, rowid = NULL, rowidOffset = NULL, overwrite = NULL, commit = NULL, wt = 'json', raw = FALSE, ...) { private$stop_if_absent(name) if (!is.null(fieldnames)) fieldnames <- paste0(fieldnames, collapse = ",") args <- sc( list(separator = separator, header = header, fieldnames = fieldnames, skip = skip, skipLines = skipLines, trim = trim, encapsulator = encapsulator, escape = escape, keepEmpty = keepEmpty, literal = literal, map = map, split = split, rowid = rowid, rowidOffset = rowidOffset, overwrite = overwrite, commit = commit, wt = wt)) docreate(self$make_url(), sprintf('solr/%s/update/csv', name), crul::upload(files), args, ctype_csv(), raw, self$proxy, ...) }, update_atomic_json = function(body, name, wt = 'json', raw = FALSE, ...) { private$stop_if_absent(name) doatomiccreate(self$make_url(), sprintf('solr/%s/update', name), body, list(wt = wt), "json", raw, self$proxy, ...) }, update_atomic_xml = function(body, name, wt = 'json', raw = FALSE, ...) { private$stop_if_absent(name) doatomiccreate(self$make_url(), sprintf('solr/%s/update', name), body, list(wt = wt), "xml", raw, self$proxy, ...) }, # utility functions make_url = function() { if (is.null(self$port)) { #sprintf("%s://%s/%s", self$scheme, self$host, self$path) sprintf("%s://%s", self$scheme, self$host) } else { #sprintf("%s://%s:%s/%s", self$scheme, self$host, self$port, self$path) sprintf("%s://%s:%s", self$scheme, self$host, self$port) } } ), private = list( stop_if_absent = function(x) { tmp <- vapply(list(self$core_exists, self$collection_exists), function(z) { tmp <- tryCatch(z(x), error = function(e) e) if (inherits(tmp, "error")) FALSE else tmp }, logical(1)) if (!any(tmp)) { stop( x, " doesn't exist - create it first.\n See core_create()/collection_create()", call. = FALSE) } }, give_data = function(x, y) { if (x) return(y) else jsonlite::fromJSON(y) }, coll_h = function(args, callopts = list(), raw) { res <- solr_GET(self$make_url(), 'solr/admin/collections', args, callopts, self$proxy) private$give_data(raw, res) }, make_proxy = function(args) { if (is.null(args)) { NULL } else { crul::proxy(url = args$url, user = args$user, pwd = args$pwd, auth = args$auth %||% "basic") } }, lint_errors = function(x) { if (!x %in% c('simple', 'complete')) { stop("errors must be one of 'simple' or 'complete'") } return(x) }, adjust_rows = function(x, optimizeMaxRows, minOptimizedRows, name) { rows <- x$rows %||% NULL rows <- cn(rows) if (!is.null(rows) && optimizeMaxRows) { if (rows > minOptimizedRows || rows < 0) { out <- self$search( name = name, params = list(q = x$q %||% NULL, rows = 0, wt = 'json'), raw = TRUE, optimizeMaxRows = FALSE) oj <- jsonlite::fromJSON(out) if (rows > oj$response$numFound || rows < 0) { rows <- as.double(oj$response$numFound) } } } return(rows) } ) ) solrium/R/core_split.R0000644000176200001440000000772413167507346014461 0ustar liggesusers#' @title Split a core #' #' @description SPLIT splits an index into two or more indexes. The index being #' split can continue to handle requests. The split pieces can be placed into #' a specified directory on the server's filesystem or it can be merged into #' running Solr cores. #' #' @export #' #' @inheritParams core_create #' @param path (character) Two or more target directory paths in which a piece of the #' index will be written #' @param targetCore (character) Two or more target Solr cores to which a piece #' of the index will be merged #' @param ranges (character) A list of number ranges, or hash ranges in hexadecimal format. #' If numbers, they get converted to hexidecimal format before being passed to #' your Solr server. #' @param split.key (character) The key to be used for splitting the index #' @param async (character) Request ID to track this action which will be processed #' asynchronously #' @details The core index will be split into as many pieces as the number of \code{path} #' or \code{targetCore} parameters. #' #' Either \code{path} or \code{targetCore} parameter must be specified but not #' both. The \code{ranges} and \code{split.key} parameters are optional and only one of #' the two should be specified, if at all required. #' @examples \dontrun{ #' # start Solr with Schemaless mode via the schemaless eg: bin/solr start -e schemaless #' # you can create a new core like: bin/solr create -c corename #' # where is the name for your core - or creaate as below #' #' # connect #' (conn <- SolrClient$new()) #' #' # Swap a core #' ## First, create two cores #' # conn$core_split("splitcoretest0") # or create in the CLI: bin/solr create -c splitcoretest0 #' # conn$core_split("splitcoretest1") # or create in the CLI: bin/solr create -c splitcoretest1 #' # conn$core_split("splitcoretest2") # or create in the CLI: bin/solr create -c splitcoretest2 #' #' ## check status #' conn$core_status("splitcoretest0", FALSE) #' conn$core_status("splitcoretest1", FALSE) #' conn$core_status("splitcoretest2", FALSE) #' #' ## split core using targetCore parameter #' conn$core_split("splitcoretest0", targetCore = c("splitcoretest1", "splitcoretest2")) #' #' ## split core using split.key parameter #' ### Here all documents having the same route key as the split.key i.e. 'A!' #' ### will be split from the core index and written to the targetCore #' conn$core_split("splitcoretest0", targetCore = "splitcoretest1", split.key = "A!") #' #' ## split core using ranges parameter #' ### Solr expects hash ranges in hexidecimal, but since we're in R, #' ### let's not make our lives any harder, so you can pass in numbers #' ### but you can still pass in hexidecimal if you want. #' rgs <- c('0-1f4', '1f5-3e8') #' conn$core_split("splitcoretest0", targetCore = c("splitcoretest1", "splitcoretest2"), ranges = rgs) #' rgs <- list(c(0, 500), c(501, 1000)) #' conn$core_split("splitcoretest0", targetCore = c("splitcoretest1", "splitcoretest2"), ranges = rgs) #' } core_split <- function(conn, name, path = NULL, targetCore = NULL, ranges = NULL, split.key = NULL, async = NULL, raw = FALSE, callopts=list()) { conn$core_split(name, path, targetCore, ranges, async, raw, callopts) } make_args <- function(x) { if (!is.null(x)) { as.list(stats::setNames(x, rep(deparse(substitute(x)), length(x)))) } else { NULL } } do_ranges <- function(x) { if (is.null(x)) { NULL } else { make_hex(x) } } make_hex <- function(x) { if (inherits(x, "list")) { clzz <- sapply(x, class) if (clzz[1] == "character") { paste0(x, collapse = ",") } else { zz <- lapply(x, function(z) { tmp <- try_as_hex(z) paste0(tmp, collapse = "-") }) paste0(zz, collapse = ",") } } else { clzz <- sapply(x, class) if (clzz[1] == "character") { paste0(x, collapse = ",") } else { paste0(try_as_hex(x), collapse = ",") } } } try_as_hex <- function(x) { tryCatch(as.hexmode(x), error = function(e) e) } solrium/R/solr_highlight.r0000644000176200001440000000326413176440326015351 0ustar liggesusers#' @title Highlighting search #' #' @description Returns only highlight items #' #' @export #' @template high #' @param conn A solrium connection object, see [SolrClient] #' @param params (list) a named list of parameters, results in a GET reqeust #' as long as no body parameters given #' @param body (list) a named list of parameters, if given a POST request #' will be performed #' @return XML, JSON, a list, or data.frame #' @seealso [solr_search()], [solr_facet()] #' @references See for #' more information on highlighting. #' @examples \dontrun{ #' # connect #' (conn <- SolrClient$new(host = "api.plos.org", path = "search", port = NULL)) #' #' # highlight search #' solr_highlight(conn, params = list(q='alcohol', hl.fl = 'abstract', rows=10), #' parsetype = "list") #' solr_highlight(conn, params = list(q='alcohol', hl.fl = c('abstract','title'), #' rows=3), parsetype = "list") #' #' # Raw data back #' ## json #' solr_highlight(conn, params = list(q='alcohol', hl.fl = 'abstract', rows=10), #' raw=TRUE) #' ## xml #' solr_highlight(conn, params = list(q='alcohol', hl.fl = 'abstract', rows=10, #' wt='xml'), raw=TRUE) #' ## parse after getting data back #' out <- solr_highlight(conn, params = list(q='theoretical math', #' hl.fl = c('abstract','title'), hl.fragsize=30, rows=10, wt='xml'), #' raw=TRUE) #' solr_parse(out, parsetype='list') #' } solr_highlight <- function(conn, name = NULL, params = NULL, body = NULL, callopts=list(), raw=FALSE, parsetype='df', ...) { conn$highlight(name = name, params = params, body = body, callopts = callopts, raw = raw, parsetype = parsetype, ...) } solrium/R/collections.R0000644000176200001440000000116213167507346014622 0ustar liggesusers#' List collections or cores #' #' @export #' @inheritParams ping #' @details Calls [collection_list()] or [core_status()] internally, #' and parses out names for you. #' @return character vector #' @examples \dontrun{ #' # connect #' (conn <- SolrClient$new()) #' #' # list collections #' conn$collection_list() #' collections(conn) #' #' # list cores #' conn$core_status() #' cores(conn) #' } collections <- function(conn, ...) { check_sr(conn) as.character(conn$collection_list(...)$collections) } #' @export #' @rdname collections cores <- function(conn, ...) { check_sr(conn) names(conn$core_status(...)$status) } solrium/R/collection_createshard.R0000644000176200001440000000157213167507346017011 0ustar liggesusers#' Create a shard #' #' @export #' @inheritParams collection_create #' @param shard (character) Required. The name of the shard to be created. #' @param createNodeSet (character) Allows defining the nodes to spread the new #' collection across. If not provided, the CREATE operation will create #' shard-replica spread across all live Solr nodes. The format is a #' comma-separated list of node_names, such as localhost:8983_solr, #' localhost:8984_s olr, localhost:8985_solr. #' @examples \dontrun{ #' (conn <- SolrClient$new()) #' ## FIXME - doesn't work right now #' # conn$collection_create(name = "trees") #' # conn$collection_createshard(name = "trees", shard = "newshard") #' } collection_createshard <- function(conn, name, shard, createNodeSet = NULL, raw = FALSE, ...) { conn$collection_createshard(name, shard, createNodeSet, raw, ...) } solrium/R/collection_splitshard.R0000644000176200001440000000220413176242775016674 0ustar liggesusers#' Create a shard #' #' @export #' @inheritParams collection_create #' @param shard (character) Required. The name of the shard to be split #' @param ranges (character) A comma-separated list of hash ranges in #' hexadecimal e.g. ranges=0-1f4,1f5-3e8,3e9-5dc #' @param split.key (character) The key to use for splitting the index #' @param async (character) Request ID to track this action which will be #' processed asynchronously #' @examples \dontrun{ #' (conn <- SolrClient$new()) #' #' # create collection #' if (!conn$collection_exists("trees")) { #' conn$collection_create("trees") #' } #' #' # find shard names #' names(conn$collection_clusterstatus()$cluster$collections$trees$shards) #' #' # split a shard by name #' conn$collection_splitshard(name = "trees", shard = "shard1") #' #' # now we have three shards #' names(conn$collection_clusterstatus()$cluster$collections$trees$shards) #' } collection_splitshard <- function(conn, name, shard, ranges = NULL, split.key = NULL, async = NULL, raw = FALSE, callopts = list()) { conn$collection_splitshard(name, shard, ranges, split.key, async, raw, callopts) } solrium/R/check_args_helpers.R0000644000176200001440000000551413176202670016114 0ustar liggesuserscheck_args_search <- function(x, reps, ...) { if (deparse(substitute(x)) == "params") { if (is.null(x$wt)) x$wt <- "json" check_wt(x$wt) } if (!is.null(x$fl)) x$fl <- paste0(x$fl, collapse = ",") # args that can be repeated tmp <- x for (i in reps) tmp[[i]] <- NULL x <- c(tmp, collectargs(z = reps, lst = x)) # additional parameters x <- c(x, list(...)) return(x) } check_args_facet <- function(x, reps, ...) { if (deparse(substitute(x)) == "params") { if (is.null(x$wt)) x$wt <- "json" check_wt(x$wt) } if (!is.null(x$fl)) x$fl <- paste0(x$fl, collapse = ",") # args that can be repeated x <- collectargs(reps, x) # additional parameters x <- c(x, list(...)) x$fl <- 'DOES_NOT_EXIST' x$facet <- 'true' if (length(x[names(x) %in% "facet.pivot"]) > 1) { xx <- paste0(unlist(unname(x[names(x) %in% "facet.pivot"])), collapse = ",") x[names(x) %in% "facet.pivot"] <- NULL x$facet.pivot <- xx } # check if any facet.* fields - if none, stop with message if (!any(grepl("facet\\.", names(x)))) { stop("didn't detect any facet. fields - at least 1 required") } return(x) } check_args_stats <- function(x, reps, ...) { if (deparse(substitute(x)) == "params") { if (is.null(x$wt)) x$wt <- "json" check_wt(x$wt) } if (!is.null(x$fl)) x$fl <- paste0(x$fl, collapse = ",") # args that can be repeated x <- collectargs(reps, x) # additional parameters x <- c(x, list(...)) x$stats <- 'true' return(x) } check_args_high <- function(x, reps, ...) { if (deparse(substitute(x)) == "params") { if (is.null(x$wt)) x$wt <- "json" check_wt(x$wt) } if (!is.null(x$fl)) x$fl <- paste0(x$fl, collapse = ",") if (!is.null(x$hl.fl)) names(x$hl.fl) <- rep("hl.fl", length(x$hl.fl)) x <- c(popp(x, "hl.fl"), x$hl.fl) # additional parameters x <- c(x, list(...)) x$hl <- 'true' # check that args are in acceptable set if (!all(names(x) %in% reps)) stop("some keys not in acceptable set for highlight") return(x) } check_args_mlt <- function(x, reps, ...) { if (deparse(substitute(x)) == "params") { if (is.null(x$wt)) x$wt <- "json" check_wt(x$wt) } fl_str <- paste0(x$fl, collapse = ",") if (any(grepl('id', x$fl))) { x$fl <- fl_str } else { x$fl <- sprintf('id,%s', fl_str) } # additional parameters x <- c(x, list(...)) x$mlt <- 'true' # check that args are in acceptable set if (!all(names(x) %in% reps)) stop("some keys not in acceptable set for mlt") return(x) } check_args_group <- function(x, reps, ...) { if (deparse(substitute(x)) == "params") { if (is.null(x$wt)) x$wt <- "json" check_wt(x$wt) } if (!is.null(x$fl)) x$fl <- paste0(x$fl, collapse = ",") # args that can be repeated x <- collectargs(reps, x) # additional parameters x <- c(x, list(...)) x$group <- 'true' return(x) } solrium/R/core_create.R0000644000176200001440000000501713167507346014562 0ustar liggesusers#' Create a core #' #' @export #' #' @param conn A solrium connection object, see [SolrClient] #' @param name (character) The name of the core to be created. Required #' @param instanceDir (character) Path to instance directory #' @param config (character) Path to config file #' @param schema (character) Path to schema file #' @param dataDir (character) Name of the data directory relative to #' instanceDir. #' @param configSet (character) Name of the configset to use for this core. #' For more information, see #' https://lucene.apache.org/solr/guide/6_6/config-sets.html #' @param collection (character) The name of the collection to which this core #' belongs. The default is the name of the core. collection.= #' causes a property of = to be set if a new collection is being #' created. Use collection.configNa me= to point to the #' configuration for a new collection. #' @param shard (character) The shard id this core represents. Normally you #' want to be auto-assigned a shard id. #' @param async (character) Request ID to track this action which will be #' processed asynchronously #' @param raw (logical) If `TRUE`, returns raw data #' @param callopts curl options passed on to [crul::HttpClient] #' @param ... You can pass in parameters like `property.name=value` to set #' core property name to value. See the section Defining core.properties for #' details on supported properties and values. #' (https://lucene.apache.org/solr/guide/6_6/defining-core-properties.html) #' @examples \dontrun{ #' # start Solr with Schemaless mode via the schemaless eg: #' # bin/solr start -e schemaless #' # you can create a new core like: bin/solr create -c corename #' # where is the name for your core - or create as below #' #' # connect #' (conn <- SolrClient$new()) #' #' # Create a core #' path <- "~/solr-7.0.0/server/solr/newcore/conf" #' dir.create(path, recursive = TRUE) #' files <- list.files("~/solr-7.0.0/server/solr/configsets/sample_techproducts_configs/conf/", #' full.names = TRUE) #' invisible(file.copy(files, path, recursive = TRUE)) #' conn$core_create(name = "newcore", instanceDir = "newcore", #' configSet = "sample_techproducts_configs") #' } core_create <- function(conn, name, instanceDir = NULL, config = NULL, schema = NULL, dataDir = NULL, configSet = NULL, collection = NULL, shard = NULL, async=NULL, raw = FALSE, callopts=list(), ...) { conn$core_create(name, instanceDir, config, schema, dataDir, configSet, collection, shard, async, raw, callopts, ...) } solrium/R/core_unload.R0000644000176200001440000000243713167507346014604 0ustar liggesusers#' Unload (delete) a core #' #' @export #' #' @inheritParams core_create #' @param deleteIndex (logical) If `TRUE`, will remove the index when unloading #' the core. Default: `FALSE` #' @param deleteDataDir (logical) If `TRUE`, removes the data directory and all #' sub-directories. Default: `FALSE` #' @param deleteInstanceDir (logical) If `TRUE`, removes everything related to #' the core, including the index directory, configuration files and other #' related files. Default: `FALSE` #' @param async (character) Request ID to track this action which will be #' processed asynchronously #' @examples \dontrun{ #' # start Solr with Schemaless mode via the schemaless eg: #' # bin/solr start -e schemaless #' #' # connect #' (conn <- SolrClient$new()) #' #' # Create a core #' conn$core_create(name = "books") #' #' # Unload a core #' conn$core_unload(name = "books") #' ## not found #' # conn$core_unload(name = "books") #' # > Error: 400 - Cannot unload non-existent core [books] #' } core_unload <- function(conn, name, deleteIndex = FALSE, deleteDataDir = FALSE, deleteInstanceDir = FALSE, async = NULL, raw = FALSE, callopts = list()) { conn$core_unload(name, deleteIndex, deleteDataDir, deleteInstanceDir, async, raw, callopts) } solrium/R/core_rename.R0000644000176200001440000000241413167507346014564 0ustar liggesusers#' Rename a core #' #' @export #' #' @inheritParams core_create #' @param other (character) The new name of the core. Required. #' @param async (character) Request ID to track this action which will be #' processed asynchronously #' @examples \dontrun{ #' # start Solr with Schemaless mode via the schemaless eg: #' # bin/solr start -e schemaless #' # you can create a new core like: bin/solr create -c corename #' # where is the name for your core - or creaate as below #' #' # connect #' (conn <- SolrClient$new()) #' #' # Status of particular cores #' path <- "~/solr-7.0.0/server/solr/testcore/conf" #' dir.create(path, recursive = TRUE) #' files <- list.files( #' "~/solr-7.0.0/server/solr/configsets/sample_techproducts_configs/conf/", #' full.names = TRUE) #' invisible(file.copy(files, path, recursive = TRUE)) #' conn$core_create("testcore") # or create in CLI: bin/solr create -c testcore #' #' # rename #' conn$core_rename("testcore", "newtestcore") #' ## status #' conn$core_status("testcore") # core missing #' conn$core_status("newtestcore", FALSE) # not missing #' #' # cleanup #' conn$core_unload("newtestcore") #' } core_rename <- function(conn, name, other, async = NULL, raw = FALSE, callopts=list()) { conn$core_rename(name, other, async, raw, callopts) } solrium/R/collection_migrate.R0000644000176200001440000000337313176214561016147 0ustar liggesusers#' Migrate documents to another collection #' #' @export #' @inheritParams collection_create #' @param target.collection (character) Required. The name of the target collection #' to which documents will be migrated #' @param split.key (character) Required. The routing key prefix. For example, if #' uniqueKey is a!123, then you would use split.key=a! #' @param forward.timeout (integer) The timeout (seconds), until which write requests #' made to the source collection for the given \code{split.key} will be forwarded to the #' target shard. Default: 60 #' @param async (character) Request ID to track this action which will be processed #' asynchronously #' @examples \dontrun{ #' (conn <- SolrClient$new()) #' #' # create collection #' if (!conn$collection_exists("migrate_from")) { #' conn$collection_create(name = "migrate_from") #' # OR: bin/solr create -c migrate_from #' } #' #' # create another collection #' if (!conn$collection_exists("migrate_to")) { #' conn$collection_create(name = "migrate_to") #' # OR bin/solr create -c migrate_to #' } #' #' # add some documents #' file <- system.file("examples", "books.csv", package = "solrium") #' x <- read.csv(file, stringsAsFactors = FALSE) #' conn$add(x, "migrate_from") #' #' # migrate some documents from one collection to the other #' ## FIXME - not sure if this is actually working.... #' # conn$collection_migrate("migrate_from", "migrate_to", split.key = "05535") #' } collection_migrate <- function(conn, name, target.collection, split.key, forward.timeout = NULL, async = NULL, raw = FALSE, callopts = list()) { conn$collection_migrate(name, target.collection, split.key, forward.timeout, async, raw = FALSE, callopts) } solrium/R/optimize.R0000644000176200001440000000270613167507346014151 0ustar liggesusers#' Optimize #' #' @export #' @param conn A solrium connection object, see [SolrClient] #' @param name (character) A collection or core name. Required. #' @param max_segments optimizes down to at most this number of segments. #' Default: 1 #' @param wait_searcher block until a new searcher is opened and registered #' as the main query searcher, making the changes visible. Default: `TRUE` #' @param soft_commit perform a soft commit - this will refresh the 'view' #' of the index in a more performant manner, but without "on-disk" guarantees. #' Default: `FALSE` #' @param wt (character) One of json (default) or xml. If json, uses #' [jsonlite::fromJSON()] to parse. If xml, uses [xml2::read_xml()] to #' parse #' @param raw (logical) If `TRUE`, returns raw data in format specified by #' \code{wt} param #' @param ... curl options passed on to [crul::HttpClient] #' @examples \dontrun{ #' (conn <- SolrClient$new()) #' #' solr_optimize(conn, "gettingstarted") #' solr_optimize(conn, "gettingstarted", max_segments = 2) #' solr_optimize(conn, "gettingstarted", wait_searcher = FALSE) #' #' # get xml back #' solr_optimize(conn, "gettingstarted", wt = "xml") #' ## raw xml #' solr_optimize(conn, "gettingstarted", wt = "xml", raw = TRUE) #' } solr_optimize <- function(conn, name, max_segments = 1, wait_searcher = TRUE, soft_commit = FALSE, wt = 'json', raw = FALSE, ...) { conn$optimize(name, max_segments, wait_searcher, soft_commit, wt, raw, ...) } solrium/R/collection_removerole.R0000644000176200001440000000145413167507346016702 0ustar liggesusers#' @title Remove a role from a node #' #' @description Remove an assigned role. This API is used to undo the roles #' assigned using \code{\link{collection_addrole}} #' #' @export #' @param role (character) Required. The name of the role. The only supported #' role as of now is overseer (set as default). #' @param node (character) Required. The name of the node. #' @inheritParams collection_create #' @examples \dontrun{ #' (conn <- SolrClient$new()) #' #' # get list of nodes #' nodes <- conn$collection_clusterstatus()$cluster$live_nodes #' conn$collection_addrole(node = nodes[1]) #' conn$collection_removerole(node = nodes[1]) #' } collection_removerole <- function(conn, role = "overseer", node, raw = FALSE, ...) { conn$collection_removerole(role, node, raw, ...) } solrium/R/update_json.R0000644000176200001440000000305613176206156014616 0ustar liggesusers#' Update documents with JSON data #' #' @export #' @family update #' @template update #' @template commitcontrol #' @param conn A solrium connection object, see [SolrClient] #' @param files Path to a single file to load into Solr #' @examples \dontrun{ #' # start Solr: bin/solr start -f -c -p 8983 #' #' # connect #' (conn <- SolrClient$new()) #' #' # Add documents #' file <- system.file("examples", "books2.json", package = "solrium") #' cat(readLines(file), sep = "\n") #' conn$update_json(files = file, name = "books") #' update_json(conn, files = file, name = "books") #' #' # Update commands - can include many varying commands #' ## Add file #' file <- system.file("examples", "updatecommands_add.json", #' package = "solrium") #' cat(readLines(file), sep = "\n") #' conn$update_json(file, "books") #' #' ## Delete file #' file <- system.file("examples", "updatecommands_delete.json", #' package = "solrium") #' cat(readLines(file), sep = "\n") #' conn$update_json(file, "books") #' #' # Add and delete in the same document #' ## Add a document first, that we can later delete #' ss <- list(list(id = 456, name = "cat")) #' conn$add(ss, "books") #' } update_json <- function(conn, files, name, commit = TRUE, optimize = FALSE, max_segments = 1, expunge_deletes = FALSE, wait_searcher = TRUE, soft_commit = FALSE, prepare_commit = NULL, wt = 'json', raw = FALSE, ...) { check_sr(conn) conn$update_json(files, name, commit, optimize, max_segments, expunge_deletes, wait_searcher, soft_commit, prepare_commit, wt, raw, ...) } solrium/R/collection_deletereplica.R0000644000176200001440000000426513176214036017317 0ustar liggesusers#' @title Delete a replica #' #' @description Delete a replica from a given collection and shard. If the #' corresponding core is up and running the core is unloaded and the entry is #' removed from the clusterstate. If the node/core is down , the entry is taken #' off the clusterstate and if the core comes up later it is automatically #' unregistered. #' #' @export #' @param conn A solrium connection object, see [SolrClient] #' @param name (character) Required. The name of the collection. #' @param shard (character) Required. The name of the shard that includes the replica to #' be removed. #' @param replica (character) Required. The name of the replica to remove. #' @param onlyIfDown (logical) When `TRUE` will not take any action if the replica #' is active. Default: `FALSE` #' @param raw (logical) If `TRUE`, returns raw data #' @param callopts curl options passed on to [crul::HttpClient] #' @param ... You can pass in parameters like \code{property.name=value} to set #' core property name to value. See the section Defining core.properties for details on #' supported properties and values. #' (https://cwiki.apache.org/confluence/display/solr/Defining+core.properties) #' @examples \dontrun{ #' (conn <- SolrClient$new()) #' #' # create collection #' if (!conn$collection_exists("foobar2")) { #' conn$collection_create(name = "foobar2", maxShardsPerNode = 2) #' } #' #' # status #' conn$collection_clusterstatus()$cluster$collections$foobar2$shards$shard1 #' #' # add replica #' conn$collection_addreplica(name = "foobar2", shard = "shard1") #' #' # delete replica #' ## get replica name #' nms <- names(conn$collection_clusterstatus()$cluster$collections$foobar2$shards$shard1$replicas) #' conn$collection_deletereplica(name = "foobar2", shard = "shard1", replica = nms[1]) #' #' # status again #' conn$collection_clusterstatus()$cluster$collections$foobar2$shards$shard1 #' } collection_deletereplica <- function(conn, name, shard = NULL, replica = NULL, onlyIfDown = FALSE, raw = FALSE, callopts=list(), ...) { conn$collection_deletereplica(name, shard, replica, onlyIfDown, raw, callopts, ...) } solrium/R/solr_search.r0000644000176200001440000001210513176420152014634 0ustar liggesusers#' @title Solr search #' #' @description Returns only matched documents, and doesn't return other items, #' including facets, groups, mlt, stats, and highlights. #' #' @export #' @template search #' @template optimizerows #' @param conn A solrium connection object, see [SolrClient] #' @param params (list) a named list of parameters, results in a GET reqeust #' as long as no body parameters given #' @param body (list) a named list of parameters, if given a POST request #' will be performed #' #' @return XML, JSON, a list, or data.frame #' @seealso [solr_highlight()], [solr_facet()] #' @references See #' for more information. #' @note SOLR v1.2 was first version to support csv. See #' #' @examples \dontrun{ #' # Connect to a local Solr instance #' (cli <- SolrClient$new()) #' cli$search("gettingstarted", params = list(q = "features:notes")) #' #' solr_search(cli, "gettingstarted") #' solr_search(cli, "gettingstarted", params = list(q = "features:notes")) #' solr_search(cli, "gettingstarted", body = list(query = "features:notes")) #' #' (cli <- SolrClient$new(host = "api.plos.org", path = "search", port = NULL)) #' cli$search(params = list(q = "*:*")) #' cli$search(params = list(q = "title:golgi", fl = c('id', 'title'))) #' #' cli$search(params = list(q = "*:*", facet = "true")) #' #' #' # search #' solr_search(cli, params = list(q='*:*', rows=2, fl='id')) #' #' # search and return all rows #' solr_search(cli, params = list(q='*:*', rows=-1, fl='id')) #' #' # Search for word ecology in title and cell in the body #' solr_search(cli, params = list(q='title:"ecology" AND body:"cell"', #' fl='title', rows=5)) #' #' # Search for word "cell" and not "body" in the title field #' solr_search(cli, params = list(q='title:"cell" -title:"lines"', fl='title', #' rows=5)) #' #' # Wildcards #' ## Search for word that starts with "cell" in the title field #' solr_search(cli, params = list(q='title:"cell*"', fl='title', rows=5)) #' #' # Proximity searching #' ## Search for words "sports" and "alcohol" within four words of each other #' solr_search(cli, params = list(q='everything:"sports alcohol"~7', #' fl='abstract', rows=3)) #' #' # Range searches #' ## Search for articles with Twitter count between 5 and 10 #' solr_search(cli, params = list(q='*:*', fl=c('alm_twitterCount','id'), #' fq='alm_twitterCount:[5 TO 50]', rows=10)) #' #' # Boosts #' ## Assign higher boost to title matches than to body matches #' ## (compare the two calls) #' solr_search(cli, params = list(q='title:"cell" abstract:"science"', #' fl='title', rows=3)) #' solr_search(cli, params = list(q='title:"cell"^1.5 AND abstract:"science"', #' fl='title', rows=3)) #' #' # FunctionQuery queries #' ## This kind of query allows you to use the actual values of fields to #' ## calculate relevancy scores for returned documents #' #' ## Here, we search on the product of counter_total_all and alm_twitterCount #' ## metrics for articles in PLOS Journals #' solr_search(cli, params = list(q="{!func}product($v1,$v2)", #' v1 = 'sqrt(counter_total_all)', #' v2 = 'log(alm_twitterCount)', rows=5, fl=c('id','title'), #' fq='doc_type:full')) #' #' ## here, search on the product of counter_total_all and alm_twitterCount, #' ## using a new temporary field "_val_" #' solr_search(cli, #' params = list(q='_val_:"product(counter_total_all,alm_twitterCount)"', #' rows=5, fl=c('id','title'), fq='doc_type:full')) #' #' ## papers with most citations #' solr_search(cli, params = list(q='_val_:"max(counter_total_all)"', #' rows=5, fl=c('id','counter_total_all'), fq='doc_type:full')) #' #' ## papers with most tweets #' solr_search(cli, params = list(q='_val_:"max(alm_twitterCount)"', #' rows=5, fl=c('id','alm_twitterCount'), fq='doc_type:full')) #' #' ## many fq values #' solr_search(cli, params = list(q="*:*", fl=c('id','alm_twitterCount'), #' fq=list('doc_type:full','subject:"Social networks"', #' 'alm_twitterCount:[100 TO 10000]'), #' sort='counter_total_month desc')) #' #' ## using wt = csv #' solr_search(cli, params = list(q='*:*', rows=50, fl=c('id','score'), #' fq='doc_type:full', wt="csv")) #' solr_search(cli, params = list(q='*:*', rows=50, fl=c('id','score'), #' fq='doc_type:full')) #' #' # using a proxy #' # cli <- SolrClient$new(host = "api.plos.org", path = "search", port = NULL, #' # proxy = list(url = "http://186.249.1.146:80")) #' # solr_search(cli, q='*:*', rows=2, fl='id', callopts=list(verbose=TRUE)) #' #' # Pass on curl options to modify request #' ## verbose #' solr_search(cli, params = list(q='*:*', rows=2, fl='id'), #' callopts = list(verbose=TRUE)) #' } solr_search <- function(conn, name = NULL, params = list(q = '*:*'), body = NULL, callopts = list(), raw = FALSE, parsetype = 'df', concat = ',', optimizeMaxRows = TRUE, minOptimizedRows = 50000L, ...) { conn$search(name = name, params = params, body = body, callopts = callopts, raw = raw, parsetype = parsetype, concat = concat, optimizeMaxRows = optimizeMaxRows, minOptimizedRows = minOptimizedRows, ...) } solrium/R/core_swap.R0000644000176200001440000000326713167507346014276 0ustar liggesusers#' @title Swap a core #' #' @description SWAP atomically swaps the names used to access two existing #' Solr cores. This can be used to swap new content into production. The #' prior core remains available and can be swapped back, if necessary. Each #' core will be known by the name of the other, after the swap #' #' @export #' #' @inheritParams core_create #' @param other (character) The name of one of the cores to be swapped. #' Required. #' @param async (character) Request ID to track this action which will be #' processed asynchronously #' @details Do not use \code{core_swap} with a SolrCloud node. It is not #' supported and can result in the core being unusable. We'll try to stop #' you if you try. #' @examples \dontrun{ #' # start Solr with Schemaless mode via the schemaless eg: #' # bin/solr start -e schemaless #' # you can create a new core like: bin/solr create -c corename #' # where is the name for your core - or creaate as below #' #' # connect #' (conn <- SolrClient$new()) #' #' # Swap a core #' ## First, create two cores #' conn$core_create("swapcoretest1") #' # - or create on CLI: bin/solr create -c swapcoretest1 #' conn$core_create("swapcoretest2") #' # - or create on CLI: bin/solr create -c swapcoretest2 #' #' ## check status #' conn$core_status("swapcoretest1", FALSE) #' conn$core_status("swapcoretest2", FALSE) #' #' ## swap core #' conn$core_swap("swapcoretest1", "swapcoretest2") #' #' ## check status again #' conn$core_status("swapcoretest1", FALSE) #' conn$core_status("swapcoretest2", FALSE) #' } core_swap <- function(conn, name, other, async = NULL, raw = FALSE, callopts=list()) { conn$core_swap(name, other, async, raw, callopts) } solrium/R/collection_create.R0000644000176200001440000001224213176213144015751 0ustar liggesusers#' Add a collection #' #' @export #' @param conn A solrium connection object, see [SolrClient] #' @param name (character) The name of the core to be created. Required #' @param numShards (integer) The number of shards to be created as part of the #' collection. This is a required parameter when using the 'compositeId' router. #' @param maxShardsPerNode (integer) When creating collections, the shards and/or replicas #' are spread across all available (i.e., live) nodes, and two replicas of the same shard #' will never be on the same node. If a node is not live when the CREATE operation is called, #' it will not get any parts of the new collection, which could lead to too many replicas #' being created on a single live node. Defining maxShardsPerNode sets a limit on the number #' of replicas CREATE will spread to each node. If the entire collection can not be fit into #' the live nodes, no collection will be created at all. Default: 1 #' @param createNodeSet (logical) Allows defining the nodes to spread the new collection #' across. If not provided, the CREATE operation will create shard-replica spread across all #' live Solr nodes. The format is a comma-separated list of node_names, such as #' localhost:8983_solr, localhost:8984_solr, localhost:8985_solr. Default: `NULL` #' @param collection.configName (character) Defines the name of the configurations (which #' must already be stored in ZooKeeper) to use for this collection. If not provided, Solr #' will default to the collection name as the configuration name. Default: `compositeId` #' @param replicationFactor (integer) The number of replicas to be created for each shard. #' Default: 1 #' @param router.name (character) The router name that will be used. The router defines #' how documents will be distributed among the shards. The value can be either `implicit`, #' which uses an internal default hash, or `compositeId`, which allows defining the specific #' shard to assign documents to. When using the 'implicit' router, the shards parameter is #' required. When using the 'compositeId' router, the numShards parameter is required. #' For more information, see also the section Document Routing. Default: `compositeId` #' @param shards (character) A comma separated list of shard names, e.g., #' shard-x,shard-y,shard-z . This is a required parameter when using the 'implicit' router. #' @param createNodeSet.shuffle (logical) Controls wether or not the shard-replicas created #' for this collection will be assigned to the nodes specified by the createNodeSet in a #' sequential manner, or if the list of nodes should be shuffled prior to creating individual #' replicas. A 'false' value makes the results of a collection creation predictible and #' gives more exact control over the location of the individual shard-replicas, but 'true' #' can be a better choice for ensuring replicas are distributed evenly across nodes. Ignored #' if createNodeSet is not also specified. Default: `TRUE` #' @param router.field (character) If this field is specified, the router will look at the #' value of the field in an input document to compute the hash and identify a shard instead of #' looking at the uniqueKey field. If the field specified is null in the document, the document #' will be rejected. Please note that RealTime Get or retrieval by id would also require the #' parameter _route_ (or shard.keys) to avoid a distributed search. #' @param autoAddReplicas (logical) When set to true, enables auto addition of replicas on #' shared file systems. See the section autoAddReplicas Settings for more details on settings #' and overrides. Default: `FALSE` #' @param async (character) Request ID to track this action which will be processed #' asynchronously #' @param raw (logical) If `TRUE`, returns raw data #' @param callopts curl options passed on to [crul::HttpClient] #' @param ... You can pass in parameters like `property.name=value` to set #' core property name to value. See the section Defining core.properties for #' details on supported properties and values. #' (https://lucene.apache.org/solr/guide/7_0/defining-core-properties.html) #' @examples \dontrun{ #' # connect #' (cli <- SolrClient$new()) #' #' if (!cli$collection_exists("helloWorld")) { #' cli$collection_create(name = "helloWorld") #' } #' if (!cli$collection_exists("tablesChairs")) { #' cli$collection_create(name = "tablesChairs") #' } #' } collection_create <- function(conn, name, numShards = 1, maxShardsPerNode = 1, createNodeSet = NULL, collection.configName = NULL, replicationFactor = 1, router.name = NULL, shards = NULL, createNodeSet.shuffle = TRUE, router.field = NULL, autoAddReplicas = FALSE, async = NULL, raw = FALSE, callopts=list(), ...) { conn$collection_create( name, numShards = numShards, maxShardsPerNode = maxShardsPerNode, createNodeSet = createNodeSet, collection.configName = collection.configName, replicationFactor = replicationFactor, router.name = router.name, shards = shards, createNodeSet.shuffle = createNodeSet.shuffle, router.field = router.field, autoAddReplicas = autoAddReplicas, async = async, raw = raw, callopts = callopts, ...) } solrium/R/update_xml.R0000644000176200001440000000345413176206303014441 0ustar liggesusers#' Update documents with XML data #' #' @export #' @family update #' @template update #' @template commitcontrol #' @param conn A solrium connection object, see [SolrClient] #' @param files Path to a single file to load into Solr #' @examples \dontrun{ #' # start Solr: bin/solr start -f -c -p 8983 #' #' # connect #' (conn <- SolrClient$new()) #' #' # create a collection #' if (!conn$collection_exists("books")) { #' conn$collection_create(name = "books", numShards = 2) #' } #' #' # Add documents #' file <- system.file("examples", "books.xml", package = "solrium") #' cat(readLines(file), sep = "\n") #' conn$update_xml(file, "books") #' #' # Update commands - can include many varying commands #' ## Add files #' file <- system.file("examples", "books2_delete.xml", package = "solrium") #' cat(readLines(file), sep = "\n") #' conn$update_xml(file, "books") #' #' ## Delete files #' file <- system.file("examples", "updatecommands_delete.xml", #' package = "solrium") #' cat(readLines(file), sep = "\n") #' conn$update_xml(file, "books") #' #' ## Add and delete in the same document #' ## Add a document first, that we can later delete #' ss <- list(list(id = 456, name = "cat")) #' conn$add(ss, "books") #' ## Now add a new document, and delete the one we just made #' file <- system.file("examples", "add_delete.xml", package = "solrium") #' cat(readLines(file), sep = "\n") #' conn$update_xml(file, "books") #' } update_xml <- function(conn, files, name, commit = TRUE, optimize = FALSE, max_segments = 1, expunge_deletes = FALSE, wait_searcher = TRUE, soft_commit = FALSE, prepare_commit = NULL, wt = 'json', raw = FALSE, ...) { check_sr(conn) conn$update_xml(files, name, commit, optimize, max_segments, expunge_deletes, wait_searcher, soft_commit, prepare_commit, wt, raw, ...) } solrium/R/collection_addreplica.R0000644000176200001440000000430713176210721016577 0ustar liggesusers#' @title Add a replica #' #' @description Add a replica to a shard in a collection. The node name can be #' specified if the replica is to be created in a specific node #' #' @export #' @inheritParams collection_create #' @param shard (character) The name of the shard to which replica is to be added. #' If \code{shard} is not given, then \code{route} must be. #' @param route (character) If the exact shard name is not known, users may pass #' the \code{route} value and the system would identify the name of the shard. #' Ignored if the \code{shard} param is also given #' @param node (character) The name of the node where the replica should be created #' @param instanceDir (character) The instanceDir for the core that will be created #' @param dataDir (character) The directory in which the core should be created #' @param async (character) Request ID to track this action which will be processed #' asynchronously #' @param ... You can pass in parameters like \code{property.name=value} to set #' core property name to value. See the section Defining core.properties for details on #' supported properties and values. #' (https://cwiki.apache.org/confluence/display/solr/Defining+core.properties) #' @examples \dontrun{ #' (conn <- SolrClient$new()) #' #' # create collection #' if (!conn$collection_exists("foobar")) { #' conn$collection_create(name = "foobar", numShards = 2) #' # OR bin/solr create -c foobar #' } #' #' # status #' conn$collection_clusterstatus()$cluster$collections$foobar #' #' # add replica #' if (!conn$collection_exists("foobar")) { #' conn$collection_addreplica(name = "foobar", shard = "shard1") #' } #' #' # status again #' conn$collection_clusterstatus()$cluster$collections$foobar #' conn$collection_clusterstatus()$cluster$collections$foobar$shards #' conn$collection_clusterstatus()$cluster$collections$foobar$shards$shard1 #' } collection_addreplica <- function(conn, name, shard = NULL, route = NULL, node = NULL, instanceDir = NULL, dataDir = NULL, async = NULL, raw = FALSE, callopts=list(), ...) { conn$collection_addreplica(name, shard, route, node, instanceDir, dataDir, async, raw, callopts, ...) } solrium/R/update_atomic_json.R0000644000176200001440000000262013167507346016153 0ustar liggesusers#' Atomic updates with JSON data #' #' Atomic updates to parts of Solr documents #' #' @export #' @param body (character) JSON as a character string #' @inheritParams update_atomic_xml #' @references #' #' @examples \dontrun{ #' # start Solr in Cloud mode: bin/solr start -e cloud -noprompt #' #' # connect #' (conn <- SolrClient$new()) #' #' # create a collection #' if (!conn$collection_exists("books")) { #' conn$collection_delete("books") #' conn$collection_create("books") #' } #' #' # Add documents #' file <- system.file("examples", "books2.json", package = "solrium") #' cat(readLines(file), sep = "\n") #' conn$update_json(file, "books") #' #' # get a document #' conn$get(ids = 343334534545, "books") #' #' # atomic update #' body <- '[{ #' "id": "343334534545", #' "genre_s": {"set": "mystery" }, #' "pages_i": {"inc": 1 } #' }]' #' conn$update_atomic_json(body, "books") #' #' # get the document again #' conn$get(ids = 343334534545, "books") #' #' # another atomic update #' body <- '[{ #' "id": "343334534545", #' "price": {"remove": "12.5" } #' }]' #' conn$update_atomic_json(body, "books") #' #' # get the document again #' conn$get(ids = 343334534545, "books") #' } update_atomic_json <- function(conn, body, name, wt = 'json', raw = FALSE, ...) { check_sr(conn) conn$update_atomic_json(body, name, wt, raw, ...) } solrium/R/collection_reload.R0000644000176200001440000000062113176242530015753 0ustar liggesusers#' Reload a collection #' #' @export #' @inheritParams collection_create #' @examples \dontrun{ #' (conn <- SolrClient$new()) #' #' if (!conn$collection_exists("helloWorld")) { #' conn$collection_create(name = "helloWorld") #' } #' #' conn$collection_reload(name = "helloWorld") #' } collection_reload <- function(conn, name, raw = FALSE, callopts) { conn$collection_reload(name, raw, callopts) } solrium/R/collection_exists.R0000644000176200001440000000151513167507346016040 0ustar liggesusers#' Check if a collection exists #' #' @export #' @param conn A solrium connection object, see [SolrClient] #' @param name (character) The name of the core. If not given, all cores. #' @param ... curl options passed on to [crul::HttpClient] #' @details Simply calls [collection_list()] internally #' @return A single boolean, `TRUE` or `FALSE` #' @examples \dontrun{ #' # start Solr with Cloud mode via the schemaless eg: bin/solr -e cloud #' # you can create a new core like: bin/solr create -c corename #' # where is the name for your core - or creaate as below #' (conn <- SolrClient$new()) #' #' # exists #' conn$collection_exists("gettingstarted") #' #' # doesn't exist #' conn$collection_exists("hhhhhh") #' } collection_exists <- function(conn, name, ...) { name %in% suppressMessages(conn$collection_list(...))$collections } solrium/R/collection_list.R0000644000176200001440000000045613176214351015466 0ustar liggesusers#' List collections #' #' @export #' @inheritParams ping #' @examples \dontrun{ #' (conn <- SolrClient$new()) #' #' conn$collection_list() #' conn$collection_list()$collections #' collection_list(conn) #' } collection_list <- function(conn, raw = FALSE, ...) { conn$collection_list(raw = raw, ...) } solrium/R/solr_group.r0000644000176200001440000000767713176437323014555 0ustar liggesusers#' @title Grouped search #' #' @description Returns only group items #' #' @export #' @template group #' @param conn A solrium connection object, see [SolrClient] #' @param params (list) a named list of parameters, results in a GET reqeust #' as long as no body parameters given #' @param body (list) a named list of parameters, if given a POST request #' will be performed #' @return XML, JSON, a list, or data.frame #' @seealso [solr_highlight()], [solr_facet()] #' @references See for more #' information. #' @examples \dontrun{ #' # connect #' (cli <- SolrClient$new()) #' #' # by default we do a GET request #' cli$group("gettingstarted", #' params = list(q='*:*', group.field='compName_s')) #' # OR #' solr_group(cli, "gettingstarted", #' params = list(q='*:*', group.field='compName_s')) #' #' # connect #' (cli <- SolrClient$new(host = "api.plos.org", path = "search", port = NULL)) #' #' # Basic group query #' solr_group(cli, params = list(q='ecology', group.field='journal', #' group.limit=3, fl=c('id','score'))) #' solr_group(cli, params = list(q='ecology', group.field='journal', #' group.limit=3, fl='article_type')) #' #' # Different ways to sort (notice diff btw sort of group.sort) #' # note that you can only sort on a field if you return that field #' solr_group(cli, params = list(q='ecology', group.field='journal', group.limit=3, #' fl=c('id','score'))) #' solr_group(cli, params = list(q='ecology', group.field='journal', group.limit=3, #' fl=c('id','score','alm_twitterCount'), group.sort='alm_twitterCount desc')) #' solr_group(cli, params = list(q='ecology', group.field='journal', group.limit=3, #' fl=c('id','score','alm_twitterCount'), sort='score asc', #' group.sort='alm_twitterCount desc')) #' #' # Two group.field values #' out <- solr_group(cli, params = list(q='ecology', group.field=c('journal','article_type'), #' group.limit=3, fl='id'), raw=TRUE) #' solr_parse(out) #' solr_parse(out, 'df') #' #' # Get two groups, one with alm_twitterCount of 0-10, and another group #' # with 10 to infinity #' solr_group(cli, params = list(q='ecology', group.limit=3, fl=c('id','alm_twitterCount'), #' group.query=c('alm_twitterCount:[0 TO 10]','alm_twitterCount:[10 TO *]'))) #' #' # Use of group.format and group.simple. #' ## The raw data structure of these two calls are slightly different, but #' ## the parsing inside the function outputs the same results. You can #' ## of course set raw=TRUE to get back what the data actually look like #' solr_group(cli, params = list(q='ecology', group.field='journal', group.limit=3, #' fl=c('id','score'), group.format='simple')) #' solr_group(cli, params = list(q='ecology', group.field='journal', group.limit=3, #' fl=c('id','score'), group.format='grouped')) #' solr_group(cli, params = list(q='ecology', group.field='journal', group.limit=3, #' fl=c('id','score'), group.format='grouped', group.main='true')) #' #' # xml back #' solr_group(cli, params = list(q='ecology', group.field='journal', group.limit=3, #' fl=c('id','score'), wt = "xml")) #' solr_group(cli, params = list(q='ecology', group.field='journal', group.limit=3, #' fl=c('id','score'), wt = "xml"), parsetype = "list") #' res <- solr_group(cli, params = list(q='ecology', group.field='journal', group.limit=3, #' fl=c('id','score'), wt = "xml"), raw = TRUE) #' library("xml2") #' xml2::read_xml(unclass(res)) #' #' solr_group(cli, params = list(q='ecology', group.field='journal', group.limit=3, #' fl='article_type', wt = "xml")) #' solr_group(cli, params = list(q='ecology', group.field='journal', group.limit=3, #' fl='article_type', wt = "xml"), parsetype = "list") #' } solr_group <- function(conn, name = NULL, params = NULL, body = NULL, callopts=list(), raw=FALSE, parsetype='df', concat=',', ...) { conn$group(name = name, params = params, body = body, callopts = callopts, raw = raw, parsetype = parsetype, concat = concat, ...) } solrium/R/zzz.r0000644000176200001440000001660613176257363013213 0ustar liggesusers#' Function to make make multiple args of the same name from a #' single input with length > 1 #' @param x Value makemultiargs <- function(x){ value <- get(x, envir = parent.frame(n = 2)) if ( length(value) == 0 ) { NULL } else { if ( any(sapply(value, is.na)) ) { NULL } else { if ( !is.character(value) ) { value <- as.character(value) } names(value) <- rep(x, length(value)) value } } } make_multiargs <- function(z, lst) { value <- lst[[z]] if (length(value) == 0) { return(NULL) } else { if (any(sapply(value, is.na))) { return(NULL) } else { if ( !is.character(value) ) { value <- as.character(value) } names(value) <- rep(z, length(value)) value } } } popp <- function(x, nms) { x[!names(x) %in% nms] } # Function to make a list of args passing arg names through multiargs function collectargs <- function(z, lst){ outlist <- list() for (i in seq_along(z)) { outlist[[i]] <- make_multiargs(z[[i]], lst) } as.list(unlist(sc(outlist))) } solr_GET <- function(base, path, args, callopts = NULL, proxy = NULL) { cli <- crul::HttpClient$new(url = base, opts = callopts) if (inherits(proxy, "proxy")) cli$proxies <- proxy res <- cli$get(path = path, query = args) if (res$status_code > 201) { solr_error(res) } else { res$parse("UTF-8") } } solr_error <- function(x) { if (grepl("html", x$response_headers$`content-type`)) { stat <- x$status_http() stop(sprintf('(%s) %s - %s', stat$status_code, stat$message, stat$explanation)) } else { err <- jsonlite::fromJSON(x$parse("UTF-8")) erropt <- Sys.getenv("SOLR_ERRORS") if (erropt == "simple" || erropt == "") { stop(err$error$code, " - ", err$error$msg, call. = FALSE) } else { stop(err$error$code, " - ", err$error$msg, "\nAPI stack trace\n", pluck_trace(err$error$trace), call. = FALSE) } } } pluck_trace <- function(x) { if (is.null(x)) { " - no stack trace" } else { x } } # POST helper fxn solr_POST <- function(base, path, body, args, ctype, proxy, ...) { invisible(match.arg(args$wt, c("xml", "json", "csv"))) args <- lapply(args, function(x) if (is.logical(x)) tolower(x) else x) cli <- crul::HttpClient$new(url = base, headers = ctype, opts = list(...)) if (inherits(proxy, "proxy")) cli$proxies <- proxy tt <- cli$post(path, query = args, body = body) get_response(tt) } # POST helper fxn - just a body solr_POST_body <- function(base, path, body, args, ctype, callopts = list(), proxy) { invisible(match.arg(args$wt, c("xml", "json"))) httpcli <- crul::HttpClient$new(url = base, headers = ctype, opts = callopts) if (inherits(proxy, "proxy")) httpcli$proxies <- proxy res <- httpcli$post(path = path, query = args, body = body, encode = "json") if (res$status_code > 201) solr_error(res) else res$parse("UTF-8") } # POST helper fxn for R objects obj_POST <- function(base, path, body, args, proxy, ...) { invisible(match.arg(args$wt, c("xml", "json", "csv"))) args <- lapply(args, function(x) if (is.logical(x)) tolower(x) else x) body <- jsonlite::toJSON(body, auto_unbox = TRUE) cli <- crul::HttpClient$new( url = base, headers = list(`Content-Type` = "application/json"), opts = list(...) ) if (inherits(proxy, "proxy")) httpcli$proxies <- proxy tt <- cli$post(path, query = args, body = body, encode = "form", ...) get_response(tt) } # check if core/collection exists, if not stop stop_if_absent <- function(x) { tmp <- vapply(list(core_exists, collection_exists), function(z) { tmp <- tryCatch(z(x), error = function(e) e) if (inherits(tmp, "error")) FALSE else tmp }, logical(1)) if (!any(tmp)) { stop( x, " doesn't exist - create it first.\n See core_create()/collection_create()", call. = FALSE) } } # helper for POSTing from R objects obj_proc <- function(base, path, body, args, raw, proxy, ...) { out <- structure(obj_POST(base, path, body, args, proxy, ...), class = "update", wt = args$wt) if (raw) { out } else { solr_parse(out) } } get_response <- function(x) { if (x$status_code > 201) { err <- jsonlite::fromJSON(x$parse("UTF-8"))$error stop(sprintf("%s: %s", err$code, err$msg), call. = FALSE) } else { x$parse("UTF-8") } } # small function to replace elements of length 0 with NULL replacelen0 <- function(x) { if (length(x) < 1) { NULL } else { x } } sc <- function(l) Filter(Negate(is.null), l) asl <- function(z) { if (is.null(z)) { NULL } else { if (is.logical(z) || tolower(z) == "true" || tolower(z) == "false") { if (z) { return('true') } else { return('false') } } else { return(z) } } } docreate <- function(base, path, files, args, ctype, raw, proxy, ...) { out <- structure(solr_POST(base, path, files, args, ctype, proxy, ...), class = "update", wt = args$wt) if (raw) return(out) solr_parse(out) } doatomiccreate <- function(base, path, body, args, content, raw, proxy, ...) { ctype <- get_ctype(content) out <- structure(solr_POST_body(base, path, body, args, ctype, list(...), proxy), class = "update", wt = args$wt) if (raw) return(out) solr_parse(out) } objcreate <- function(base, path, dat, args, raw, ...) { out <- structure(solr_POST(base, path, dat, args, "json", ...), class = "update", wt = args$wt) if (raw) return(out) solr_parse(out) } check_wt <- function(x) { if (!is.null(x)) { if (!x %in% c('json', 'xml', 'csv')) { stop("wt must be one of: json, xml, csv", call. = FALSE) } } } check_defunct <- function(...) { calls <- names(sapply(match.call(), deparse))[-1] calls_vec <- "verbose" %in% calls if (any(calls_vec)) { stop("The parameter verbose has been removed - see ?SolrClient", call. = FALSE) } } is_in_cloud_mode <- function(x) { xx <- crul::HttpClient$new(url = x$make_url()) res <- xx$get("solr/admin/collections", query = list(action = 'LIST', wt = 'json')) if (res$status_code > 201) return(FALSE) msg <- jsonlite::fromJSON(res$parse("UTF-8"))$error$msg if (is.null(msg)) return(TRUE) !grepl("not running", msg) } is_not_in_cloud_mode <- function(x) !is_in_cloud_mode(x) json_parse <- function(x, raw) { if (raw) { x } else { jsonlite::fromJSON(x) } } unbox_if <- function(x, recursive = FALSE) { if (!is.null(x)) { if (recursive) { rapply(x, jsonlite::unbox, how = "list") } else { lapply(x, jsonlite::unbox) } } else { NULL } } `%||%` <- function(x, y) if (suppressWarnings(is.na(x)) || is.null(x)) y else x url_handle <- function(name) { if (is.null(name)) { "" } else { file.path("solr", name, "select") } } check_sr <- function(x) { if (!inherits(x, "SolrClient")) { stop("conn must be a SolrClient object, see ?SolrClient") } } cn <- function(x) { name <- substitute(x) if (!is.null(x)) { tryx <- tryCatch(as.numeric(as.character(x)), warning = function(e) e) if ("warning" %in% class(tryx)) { stop(name, " should be a numeric or integer class value", call. = FALSE) } if (!inherits(tryx, "numeric") | is.na(tryx)) stop(name, " should be a numeric or integer class value", call. = FALSE) return( format(x, digits = 22, scientific = FALSE) ) } else { NULL } } solrium/R/collection_deletealias.R0000644000176200001440000000153113176213371016763 0ustar liggesusers#' Delete a collection alias #' #' @export #' @param conn A solrium connection object, see [SolrClient] #' @param alias (character) Required. The alias name to be created #' @param raw (logical) If `TRUE`, returns raw data #' @param callopts curl options passed on to [crul::HttpClient] #' @examples \dontrun{ #' (conn <- SolrClient$new()) #' #' if (!conn$collection_exists("thingsstuff")) { #' conn$collection_create(name = "thingsstuff") #' } #' #' conn$collection_createalias("tstuff", "thingsstuff") #' conn$collection_clusterstatus()$cluster$collections$thingsstuff$aliases # new alias #' conn$collection_deletealias("tstuff") #' conn$collection_clusterstatus()$cluster$collections$thingsstuff$aliases # gone #' } collection_deletealias <- function(conn, alias, raw = FALSE, callopts = list()) { conn$collection_deletealias(alias, raw, callopts) } solrium/R/config_set.R0000644000176200001440000000260613167507346014430 0ustar liggesusers#' Set Solr configuration details #' #' @export #' #' @param conn A solrium connection object, see [SolrClient] #' @param name (character) The name of the core. If not given, all cores. #' @param set (list) List of key:value pairs of what to set. Default: NULL #' (nothing passed) #' @param unset (list) One or more character strings of keys to unset. Default: NULL #' (nothing passed) #' @param ... curl options passed on to [crul::HttpClient] #' @return A list with response from server #' @examples \dontrun{ #' # start Solr with Cloud mode via the schemaless eg: bin/solr -e cloud #' # you can create a new core like: bin/solr create -c corename #' # where is the name for your core - or creaate as below #' #' # connect #' (conn <- SolrClient$new()) #' #' # set a property #' conn$config_set("gettingstarted", #' set = list(query.filterCache.autowarmCount = 1000)) #' #' # unset a property #' conn$config_set("gettingstarted", unset = "query.filterCache.size", #' verbose = TRUE) #' #' # both set a property and unset a property #' conn$config_set("gettingstarted", unset = "enableLazyFieldLoading") #' #' # many properties #' conn$config_set("gettingstarted", set = list( #' query.filterCache.autowarmCount = 1000, #' query.commitWithin.softCommit = 'false' #' ) #' ) #' } config_set <- function(conn, name, set = NULL, unset = NULL, ...) { conn$config_set(name, set, unset, ...) } solrium/R/collection_rebalanceleaders.R0000644000176200001440000000322313176240611017760 0ustar liggesusers#' @title Rebalance leaders #' #' @description Reassign leaders in a collection according to the preferredLeader #' property across active nodes #' #' @export #' @inheritParams collection_create #' @param maxAtOnce (integer) The maximum number of reassignments to have queue #' up at once. Values <=0 are use the default value Integer.MAX_VALUE. When #' this number is reached, the process waits for one or more leaders to be #' successfully assigned before adding more to the queue. #' @param maxWaitSeconds (integer) Timeout value when waiting for leaders to #' be reassigned. NOTE: if maxAtOnce is less than the number of reassignments #' that will take place, this is the maximum interval that any single wait for #' at least one reassignment. For example, if 10 reassignments are to take #' place and maxAtOnce is 1 and maxWaitSeconds is 60, the upper bound on the #' time that the command may wait is 10 minutes. Default: 60 #' @examples \dontrun{ #' (conn <- SolrClient$new()) #' #' # create collection #' if (!conn$collection_exists("mycollection2")) { #' conn$collection_create(name = "mycollection2") #' # OR: bin/solr create -c mycollection2 #' } #' #' # balance preferredLeader property #' conn$collection_balanceshardunique("mycollection2", property = "preferredLeader") #' #' # balance preferredLeader property #' conn$collection_rebalanceleaders("mycollection2") #' #' # examine cluster status #' conn$collection_clusterstatus()$cluster$collections$mycollection2 #' } collection_rebalanceleaders <- function(conn, name, maxAtOnce = NULL, maxWaitSeconds = NULL, raw = FALSE, ...) { conn$collection_rebalanceleaders(name, maxAtOnce, maxWaitSeconds, raw, ...) } solrium/R/schema.R0000644000176200001440000000327213167507346013550 0ustar liggesusers#' Get the schema for a collection or core #' #' @export #' @param what (character) What to retrieve. By default, we retrieve the entire #' schema. Options include: fields, dynamicfields, fieldtypes, copyfields, name, #' version, uniquekey, similarity, "solrqueryparser/defaultoperator" #' @inheritParams ping #' @examples \dontrun{ #' # start Solr, in your CLI, run: `bin/solr start -e cloud -noprompt` #' # after that, if you haven't run `bin/post -c gettingstarted docs/` yet, do so #' #' # connect: by default we connect to localhost, port 8983 #' (cli <- SolrClient$new()) #' #' # get the schema for the gettingstarted index #' schema(cli, name = "gettingstarted") #' #' # Get parts of the schema #' schema(cli, name = "gettingstarted", "fields") #' schema(cli, name = "gettingstarted", "dynamicfields") #' schema(cli, name = "gettingstarted", "fieldtypes") #' schema(cli, name = "gettingstarted", "copyfields") #' schema(cli, name = "gettingstarted", "name") #' schema(cli, name = "gettingstarted", "version") #' schema(cli, name = "gettingstarted", "uniquekey") #' schema(cli, name = "gettingstarted", "similarity") #' schema(cli, name = "gettingstarted", "solrqueryparser/defaultoperator") #' #' # get raw data #' schema(cli, name = "gettingstarted", "similarity", raw = TRUE) #' schema(cli, name = "gettingstarted", "uniquekey", raw = TRUE) #' #' # start Solr in Schemaless mode: bin/solr start -e schemaless #' # schema(cli, "gettingstarted") #' #' # start Solr in Standalone mode: bin/solr start #' # then add a core: bin/solr create -c helloWorld #' # schema(cli, "helloWorld") #' } schema <- function(conn, name, what = '', raw = FALSE, ...) { conn$schema(name = name, what = what, raw = raw, ...) } solrium/R/solr_get.R0000644000176200001440000000305513167507346014125 0ustar liggesusers#' @title Real time get #' #' @description Get documents by id #' #' @export #' @param conn A solrium connection object, see [SolrClient] #' @param ids Document IDs, one or more in a vector or list #' @param name (character) A collection or core name. Required. #' @param fl Fields to return, can be a character vector like #' `c('id', 'title')`, or a single character vector with one or more #' comma separated names, like `'id,title'` #' @param wt (character) One of json (default) or xml. Data type returned. #' If json, uses [jsonlite::fromJSON()] to parse. If xml, uses #' [xml2::read_xml()] to parse. #' @param raw (logical) If `TRUE`, returns raw data in format specified by #' `wt` param #' @param ... curl options passed on to [crul::HttpClient] #' @details We use json internally as data interchange format for this function. #' @examples \dontrun{ #' (cli <- SolrClient$new()) #' #' # add some documents first #' ss <- list(list(id = 1, price = 100), list(id = 2, price = 500)) #' add(cli, ss, name = "gettingstarted") #' #' # Now, get documents by id #' solr_get(cli, ids = 1, "gettingstarted") #' solr_get(cli, ids = 2, "gettingstarted") #' solr_get(cli, ids = c(1, 2), "gettingstarted") #' solr_get(cli, ids = "1,2", "gettingstarted") #' #' # Get raw JSON #' solr_get(cli, ids = 1, "gettingstarted", raw = TRUE, wt = "json") #' solr_get(cli, ids = 1, "gettingstarted", raw = TRUE, wt = "xml") #' } solr_get <- function(conn, ids, name, fl = NULL, wt = 'json', raw = FALSE, ...) { check_sr(conn) conn$get(ids = ids, name = name, fl = fl, wt = wt, raw = raw, ...) } solrium/R/update_atomic_xml.R0000644000176200001440000000373413167507346016011 0ustar liggesusers#' Atomic updates with XML data #' #' Atomic updates to parts of Solr documents #' #' @export #' @param conn A solrium connection object, see [SolrClient] #' @param body (character) XML as a character string #' @param name (character) Name of the core or collection #' @param wt (character) One of json (default) or xml. If json, uses #' [jsonlite::fromJSON()] to parse. If xml, uses [xml2::read_xml()] to parse #' @param raw (logical) If `TRUE`, returns raw data in format specified by #' `wt` param #' @param ... curl options passed on to [crul::HttpClient] #' @references #' #' @examples \dontrun{ #' # start Solr in Cloud mode: bin/solr start -e cloud -noprompt #' #' # connect #' (conn <- SolrClient$new()) #' #' # create a collection #' if (!conn$collection_exists("books")) { #' conn$collection_delete("books") #' conn$collection_create("books") #' } #' #' # Add documents #' file <- system.file("examples", "books.xml", package = "solrium") #' cat(readLines(file), sep = "\n") #' conn$update_xml(file, "books") #' #' # get a document #' conn$get(ids = '978-0641723445', "books", wt = "xml") #' #' # atomic update #' body <- ' #' #' #' 978-0641723445 #' mystery #' 1 #' #' ' #' conn$update_atomic_xml(body, name="books") #' #' # get the document again #' conn$get(ids = '978-0641723445', "books", wt = "xml") #' #' # another atomic update #' body <- ' #' #' #' 978-0641723445 #' 12.5 #' #' ' #' conn$update_atomic_xml(body, "books") #' #' # get the document again #' conn$get(ids = '978-0641723445', "books", wt = "xml") #' } update_atomic_xml <- function(conn, body, name, wt = 'json', raw = FALSE, ...) { check_sr(conn) conn$update_atomic_xml(body, name, wt, raw, ...) } solrium/R/collection_deleteshard.R0000644000176200001440000000251013176214305016767 0ustar liggesusers#' @title Delete a shard #' #' @description Deleting a shard will unload all replicas of the shard and remove #' them from clusterstate.json. It will only remove shards that are inactive, or #' which have no range given for custom sharding. #' #' @export #' @param conn A solrium connection object, see [SolrClient] #' @param name (character) Required. The name of the collection that includes the shard #' to be deleted #' @param shard (character) Required. The name of the shard to be deleted #' @param raw (logical) If `TRUE`, returns raw data #' @param ... curl options passed on to [crul::HttpClient] #' @examples \dontrun{ #' (conn <- SolrClient$new()) #' #' # create collection #' if (!conn$collection_exists("buffalo")) { #' conn$collection_create(name = "buffalo") #' # OR: bin/solr create -c buffalo #' } #' #' # find shard names #' names(conn$collection_clusterstatus()$cluster$collections$buffalo$shards) #' #' # split a shard by name #' collection_splitshard(conn, name = "buffalo", shard = "shard1") #' #' # now we have three shards #' names(conn$collection_clusterstatus()$cluster$collections$buffalo$shards) #' #' # delete shard #' conn$collection_deleteshard(name = "buffalo", shard = "shard1_1") #' } collection_deleteshard <- function(conn, name, shard, raw = FALSE, ...) { conn$collection_deleteshard(name, shard, raw, ...) } solrium/R/collection_clusterprop.R0000644000176200001440000000257213176211255017076 0ustar liggesusers#' @title Add, edit, delete a cluster-wide property #' #' @description Important: whether add, edit, or delete is used is determined #' by the value passed to the \code{val} parameter. If the property name is #' new, it will be added. If the property name exists, and the value is #' different, it will be edited. If the property name exists, and the value #' is `NULL` or empty the property is deleted (unset). #' #' @export #' @param conn A solrium connection object, see [SolrClient] #' @param name (character) Name of the core or collection #' @param val (character) Required. The value of the property. If the value is #' empty or null, the property is unset. #' @param raw (logical) If \code{TRUE}, returns raw data in format specified by #' \code{wt} param #' @param callopts curl options passed on to [crul::HttpClient] #' @examples \dontrun{ #' (conn <- SolrClient$new()) #' #' # add the value https to the property urlScheme #' collection_clusterprop(conn, name = "urlScheme", val = "https") #' #' # status again #' collection_clusterstatus(conn)$cluster$properties #' #' # delete the property urlScheme by setting val to NULL or a 0 length string #' collection_clusterprop(conn, name = "urlScheme", val = "") #' } collection_clusterprop <- function(conn, name, val, raw = FALSE, callopts=list()) { conn$collection_clusterprop(name, val, raw, callopts) } solrium/R/collection_clusterstatus.R0000644000176200001440000000165613167507346017454 0ustar liggesusers#' @title Get cluster status #' #' @description Fetch the cluster status including collections, shards, #' replicas, configuration name as well as collection aliases and cluster #' properties. #' #' @export #' @inheritParams collection_create #' @param shard (character) The shard(s) for which information is requested. #' Multiple shard names can be specified as a character vector. #' @examples \dontrun{ #' (conn <- SolrClient$new()) #' conn$collection_clusterstatus() #' res <- conn$collection_clusterstatus() #' res$responseHeader #' res$cluster #' res$cluster$collections #' res$cluster$collections$gettingstarted #' res$cluster$live_nodes #' } collection_clusterstatus <- function(conn, name = NULL, shard = NULL, raw = FALSE, ...) { conn$collection_clusterstatus(name, shard, raw, ...) } check_shard <- function(x) { if (is.null(x)) { x } else { paste0(x, collapse = ",") } } solrium/R/config_overlay.R0000644000176200001440000000161013167507346015310 0ustar liggesusers#' Get Solr configuration overlay #' #' @export #' @param conn A solrium connection object, see [SolrClient] #' @param name (character) The name of the core. If not given, all cores. #' @param omitHeader (logical) If `TRUE`, omit header. Default: `FALSE` #' @param ... curl options passed on to [crul::HttpClient] #' @return A list with response from server #' @examples \dontrun{ #' # start Solr with Cloud mode via the schemaless eg: bin/solr -e cloud #' # you can create a new core like: bin/solr create -c corename #' # where is the name for your core - or creaate as below #' #' # connect #' (conn <- SolrClient$new()) #' #' # get config overlay #' conn$config_overlay("gettingstarted") #' #' # without header #' conn$config_overlay("gettingstarted", omitHeader = TRUE) #' } config_overlay <- function(conn, name, omitHeader = FALSE, ...) { conn$config_overlay(name, omitHeader, ...) } solrium/R/parsers.R0000644000176200001440000005470113176165145013767 0ustar liggesusers#' Parse raw data from solr_search, solr_facet, or solr_highlight. #' #' @param input Output from solr_facet #' @param parsetype One of 'list' or 'df' (data.frame) #' @param concat Character to conactenate strings by, e.g,. ',' (character). #' Used in solr_parse.sr_search only. #' @details This is the parser used internally in solr_facet, but if you #' output raw data from solr_facet using raw=TRUE, then you can use this #' function to parse that data (a sr_facet S3 object) after the fact to a #' list of data.frame's for easier consumption. The data format type is #' detected from the attribute "wt" on the sr_facet object. #' @export solr_parse <- function(input, parsetype = NULL, concat) { UseMethod("solr_parse") } #' @export solr_parse.default <- function(input, parsetype=NULL, concat=',') { stop("no 'solr_parse' method for ", class(input), call. = FALSE) } #' @export solr_parse.ping <- function(input, parsetype=NULL, concat=',') { wt <- attributes(input)$wt parse_it(input, wt) } #' @export solr_parse.update <- function(input, parsetype=NULL, concat=',') { wt <- attributes(input)$wt switch(wt, xml = xml2::read_xml(unclass(input)), json = jsonlite::fromJSON(input, simplifyDataFrame = FALSE, simplifyMatrix = FALSE), csv = jsonlite::fromJSON(input, simplifyDataFrame = FALSE, simplifyMatrix = FALSE) ) } #' @export solr_parse.sr_facet <- function(input, parsetype = NULL, concat = ',') { if (inherits(unclass(input), "character")) { input <- parse_ch(input, parsetype, concat) } wt <- attributes(input)$wt # Facet queries if (wt == 'json') { fqdat <- input$facet_counts$facet_queries if (length(fqdat) == 0) { fqout <- NULL } else { fqout <- data_frame( term = names(fqdat), value = do.call(c, fqdat) ) } row.names(fqout) <- NULL } else { nodes <- xml2::xml_find_all(input, '//lst[@name="facet_queries"]//int') if (length(nodes) == 0) { fqout <- NULL } else { fqout <- data_frame( term = xml2::xml_attr(nodes, "name"), value = xml2::xml_text(nodes) ) } } # facet fields if (wt == 'json') { ffout <- lapply(input$facet_counts$facet_fields, function(x) { stats::setNames(as_data_frame(do.call(rbind, lapply(seq(1, length(x), by = 2), function(y) { x[c(y, y + 1)] }))), c('term', 'value')) }) } else { nodes <- xml_find_all(input, '//lst[@name="facet_fields"]//lst') ffout <- lapply(nodes, function(z) { ch <- xml_children(z) data_frame(term = vapply(ch, xml_attr, "", attr = "name"), value = vapply(ch, xml_text, "")) }) names(ffout) <- xml_attr(nodes, "name") } # facet pivot if (wt == 'json') { fpout <- NULL pivot_input <- jsonlite::fromJSON(jsonlite::toJSON(input))$facet_count$facet_pivot[[1]] if (length(pivot_input) != 0) { fpout <- list() pivots_left <- ('pivot' %in% names(pivot_input)) if (pivots_left) { infinite_loop_check <- 1 while (pivots_left & infinite_loop_check < 100) { stopifnot(is.data.frame(pivot_input)) flattened_result <- pivot_flatten_tabular(pivot_input) fpout <- c(fpout, list(flattened_result$parent)) pivot_input <- flattened_result$flattened_pivot pivots_left <- ('pivot' %in% names(pivot_input)) infinite_loop_check <- infinite_loop_check + 1 } fpout <- c(fpout, list(flattened_result$flattened_pivot)) } else { fpout <- c(fpout, list(pivot_input)) } fpout <- lapply(fpout, collapse_pivot_names) names(fpout) <- sapply(fpout, FUN = function(x) { paste(head(names(x), -1), collapse = ",") }) } } else { message('facet.pivot results are not supported with XML response types, use wt="json"') fpout <- NULL } # Facet dates if (wt == 'json') { datesout <- NULL if (length(input$facet_counts$facet_dates) != 0) { datesout <- lapply(input$facet_counts$facet_dates, function(x) { x <- x[!names(x) %in% c('gap','start','end')] data_frame(date = names(x), value = do.call(c, x)) }) } } else { nodes <- xml_find_all(input, '//lst[@name="facet_dates"]')[[1]] if (length(nodes) != 0) { datesout <- stats::setNames(lapply(xml_children(nodes), function(z) { z <- xml_find_all(z, 'int') data_frame( date = xml2::xml_attr(z, "name"), value = xml2::xml_text(z) ) }), xml_attr(xml_children(nodes), "name")) } } # Facet ranges rangesout <- NULL if (wt == 'json') { if (length(input$facet_counts$facet_ranges) != 0) { rangesout <- lapply(input$facet_counts$facet_ranges, function(x){ x <- x[!names(x) %in% c('gap','start','end')]$counts stats::setNames(as_data_frame(do.call(rbind, lapply(seq(1, length(x), by = 2), function(y){ x[c(y, y + 1)] }))), c('term', 'value')) }) } } else { nodes <- xml_find_all(input, '//lst[@name="facet_ranges"]//lst[not(@name="counts")]') if (length(nodes) != 0) { rangesout <- stats::setNames(lapply(nodes, function(z) { z <- xml_children(xml_find_first(z, 'lst[@name="counts"]')) data_frame( term = xml2::xml_attr(z, "name"), value = xml2::xml_text(z) ) }), xml_attr(nodes, "name")) } } # output res <- list(facet_queries = replacelen0(fqout), facet_fields = replacelen0(ffout), facet_pivot = replacelen0(fpout), facet_dates = replacelen0(datesout), facet_ranges = replacelen0(rangesout)) res <- if (length(sc(res)) == 0) NULL else res return( res ) } #' @export #' @rdname solr_parse solr_parse.sr_high <- function(input, parsetype='list', concat=',') { if (inherits(unclass(input), "character")) input <- parse_ch(input, parsetype, concat) wt <- attributes(input)$wt if (wt == 'json') { if (parsetype == 'df') { dat <- input$highlight df <- dplyr::bind_rows(lapply(dat, function(z) { dplyr::as_data_frame(lapply(z, function(w) { if (length(w) > 1) paste0(w, collapse = "") else w })) })) if (NROW(df) == 0) { highout <- tibble::data_frame() } else { highout <- tibble::add_column(df, names = names(dat), .before = TRUE) } } else { highout <- input$highlight } } else { highout <- xml_children(xml_find_all(input, '//lst[@name="highlighting"]')) tmptmp <- lapply(highout, function(z) { c( names = xml_attr(z, "name"), sapply( xml_children(z), function(w) as.list(stats::setNames(xml_text(w), xml_attr(w, "name")))) ) }) if (parsetype == 'df') { highout <- dplyr::bind_rows(lapply(tmptmp, dplyr::as_data_frame)) } else { highout <- tmptmp } } return( highout ) } #' @export #' @rdname solr_parse solr_parse.sr_search <- function(input, parsetype = 'list', concat = ',') { if (inherits(unclass(input), "character")) input <- parse_ch(input, parsetype, concat) wt <- attributes(input)$wt if (wt == 'json') { if (parsetype == 'df') { dat <- input$response$docs dat2 <- lapply(dat, function(x) { lapply(x, function(y) { tmp <- if (length(y) > 1) { paste(y, collapse = concat) } else { y } if (inherits(y, "list")) unlist(tmp) else tmp }) }) datout <- dplyr::bind_rows(lapply(dat2, as_data_frame)) } else { datout <- input$response$docs } datout <- add_atts(datout, popp(input$response, "docs")) } else if (wt == "xml") { temp <- xml2::xml_find_all(input, '//doc') tmptmp <- lapply(temp, function(x) { sapply(xml2::xml_children(x), nmtxt) }) if (parsetype == 'df') { datout <- dplyr::bind_rows(lapply(tmptmp, as_data_frame)) } else { datout <- tmptmp } datout <- add_atts(datout, as.list(xml2::xml_attrs(xml2::xml_find_first(input, "result")))) } else { datout <- input } return( datout ) } #' @export #' @rdname solr_parse solr_parse.sr_all <- function(input, parsetype = 'list', concat = ',') { list( search = solr_parse.sr_search(unclass(input), parsetype, concat), facet = solr_parse.sr_facet(unclass(input), parsetype, concat), high = solr_parse.sr_high(unclass(input), parsetype, concat), mlt = solr_parse.sr_mlt(unclass(input), parsetype, concat), group = solr_parse.sr_group(unclass(input), parsetype, concat), stats = solr_parse.sr_stats(unclass(input), parsetype, concat) ) } #' @export #' @rdname solr_parse solr_parse.sr_mlt <- function(input, parsetype = 'list', concat = ',') { if (inherits(unclass(input), "character")) input <- parse_ch(input, parsetype, concat) wt <- attributes(input)$wt if (wt == 'json') { if (parsetype == 'df') { res <- input$response reslist <- lapply(res$docs, function(y) { lapply(y, function(z) { if (length(z) > 1) { paste(z, collapse = concat) } else { z } }) }) resdat <- dplyr::bind_rows(lapply(reslist, as_data_frame)) dat <- input$moreLikeThis dat2 <- lapply(dat, function(x){ lapply(x$docs, function(y){ lapply(y, function(z){ if (length(z) > 1) { paste(z, collapse = concat) } else { z } }) }) }) datmlt <- list() for (i in seq_along(dat)) { attsdf <- as_data_frame(popp(dat[[i]], "docs")) df <- dplyr::bind_rows(lapply(dat[[i]]$docs, function(y) { as_data_frame(lapply(y, function(z) { if (length(z) > 1) { paste(z, collapse = concat) } else { z } })) })) if (NROW(df) == 0) { df <- attsdf } else { df <- as_tibble(cbind(attsdf, df)) } datmlt[[names(dat[i])]] <- df } datout <- list(docs = resdat, mlt = datmlt) } else { datout <- input$moreLikeThis } } else { res <- xml_find_all(input, '//result[@name="response"]//doc') resdat <- dplyr::bind_rows(lapply(res, function(x){ tmp <- sapply(xml_children(x), nmtxt) as_data_frame(tmp) })) temp <- xml_find_all(input, '//lst[@name="moreLikeThis"]') tmptmp <- stats::setNames(lapply(xml_children(temp), function(z) { lapply(xml_find_all(z, "doc"), function(w) { sapply(xml_children(w), nmtxt) }) }), xml_attr(xml_children(temp), "name")) tmptmp <- Map(function(x, y) { atts <- as.list(xml_attrs(y)) for (i in seq_along(atts)) { attr(x, names(atts)[i]) <- atts[[i]] } x }, tmptmp, xml_children(temp) ) if (parsetype == 'df') { datmlt <- lapply(tmptmp, function(z) { df <- dplyr::bind_rows(lapply(z, as_data_frame)) atts <- attributes(z) attsdf <- as_data_frame(atts) if (NROW(df) == 0) { attsdf } else { as_tibble(cbind(attsdf, df)) } }) datout <- list(docs = resdat, mlt = datmlt) } else { datout <- list(docs = resdat, mlt = tmptmp) } } return( datout ) } #' @export #' @rdname solr_parse solr_parse.sr_stats <- function(input, parsetype = 'list', concat = ',') { if (inherits(unclass(input), "character")) input <- parse_ch(input, parsetype, concat) wt <- attributes(input)$wt if (wt == 'json') { if (parsetype == 'df') { dat <- input$stats$stats_fields dat2 <- lapply(dat, function(x){ data.frame(x[!names(x) %in% 'facets']) }) dat_reg <- do.call(rbind, dat2) # parse the facets if (length(dat[[1]]$facets) == 0) { dat_facet <- NULL } else { dat_facet <- lapply(dat, function(x){ facetted <- x[names(x) %in% 'facets'][[1]] if (length(facetted) == 1) { df <- dplyr::bind_rows( lapply(facetted[[1]], function(z) { as_data_frame( lapply(z[!names(z) %in% 'facets'], function(w) { if (length(w) == 0) "" else w }) ) }) , .id = names(facetted)) } else { df <- stats::setNames(lapply(seq.int(length(facetted)), function(n) { dplyr::bind_rows(lapply(facetted[[n]], function(b) { as_data_frame( lapply(b[!names(b) %in% 'facets'], function(w) { if (length(w) == 0) "" else w }) ) }), .id = names(facetted)[n]) }), names(facetted)) } return(df) }) } datout <- list(data = dat_reg, facet = dat_facet) } else { dat <- input$stats$stats_fields # w/o facets dat_reg <- lapply(dat, function(x){ x[!names(x) %in% 'facets'] }) # just facets dat_facet <- lapply(dat, function(x){ facetted <- x[names(x) %in% 'facets'][[1]] if (length(facetted) == 1) { lapply(facetted[[1]], function(z) z[!names(z) %in% 'facets']) } else { df <- lapply(facetted, function(z){ lapply(z, function(zz) zz[!names(zz) %in% 'facets']) }) } }) datout <- list(data = dat_reg, facet = dat_facet) } } else { temp <- xml_find_all(input, '//lst/lst[@name="stats_fields"]/lst') if (parsetype == 'df') { # w/o facets dat_reg <- dplyr::bind_rows(stats::setNames(lapply(temp, function(h){ as_data_frame(popp(sapply(xml_children(h), nmtxt), "facets")) }), xml_attr(temp, "name")), .id = "stat") # just facets dat_facet <- stats::setNames(lapply(temp, function(e){ tt <- xml_find_first(e, 'lst[@name="facets"]') stats::setNames(lapply(xml_children(tt), function(f){ dplyr::bind_rows(stats::setNames(lapply(xml_children(f), function(g){ as_data_frame(popp(sapply(xml_children(g), nmtxt), "facets")) }), xml_attr(xml_children(f), "name")), .id = xml_attr(f, "name")) }), xml_attr(xml_children(tt), "name")) }), xml_attr(temp, "name")) datout <- list(data = dat_reg, facet = dat_facet) } else { # w/o facets dat_reg <- stats::setNames(lapply(temp, function(h){ popp(sapply(xml_children(h), nmtxt), "facets") }), xml_attr(temp, "name")) # just facets dat_facet <- stats::setNames(lapply(temp, function(e){ tt <- xml_find_first(e, 'lst[@name="facets"]') stats::setNames(lapply(xml_children(tt), function(f){ stats::setNames(lapply(xml_children(f), function(g){ popp(sapply(xml_children(g), nmtxt), "facets") }), xml_attr(xml_children(f), "name")) }), xml_attr(xml_children(tt), "name")) }), xml_attr(temp, "name")) datout <- list(data = dat_reg, facet = dat_facet) } } datout <- if (length(Filter(length, datout)) == 0) NULL else datout return( datout ) } #' @export #' @rdname solr_parse solr_parse.sr_group <- function(input, parsetype = 'list', concat = ',') { if (inherits(unclass(input), "character")) input <- parse_ch(input, parsetype, concat) wt <- attributes(input)$wt if (wt == 'json') { if (parsetype == 'df') { if ('response' %in% names(input)) { datout <- cbind(data.frame( numFound = input[[1]]$numFound, start = input[[1]]$start), do.call(rbind.fill, lapply(input[[1]]$docs, data.frame, stringsAsFactors = FALSE)) ) } else { dat <- input$grouped if (length(dat) == 1) { if ('groups' %in% names(dat[[1]])) { datout <- dat[[1]]$groups datout <- do.call(rbind.fill, lapply(datout, function(x){ df <- data.frame(groupValue = ifelse(is.null(x$groupValue),"none",x$groupValue), numFound = x$doclist$numFound, start = x$doclist$start) cbind(df, do.call(rbind.fill, lapply(x$doclist$docs, function(z) { data.frame(lapply(z, function(zz) { if (length(zz) > 1) { paste(zz, collapse = concat) } else { zz } }), stringsAsFactors = FALSE) }) )) })) } else { datout <- cbind(data.frame(numFound = dat[[1]]$doclist$numFound, start = dat[[1]]$doclist$start), do.call(rbind.fill, lapply(dat[[1]]$doclist$docs, data.frame, stringsAsFactors = FALSE))) } } else { if ('groups' %in% names(dat[[1]])) { datout <- lapply(dat, function(y) { y <- y$groups do.call(rbind.fill, lapply(y, function(x){ df <- data.frame( groupValue = ifelse(is.null(x$groupValue), "none", x$groupValue), numFound = x$doclist$numFound, start = x$doclist$start, stringsAsFactors = FALSE ) cbind(df, do.call(rbind.fill, lapply(x$doclist$docs, data.frame, stringsAsFactors = FALSE))) })) }) } else { datout <- do.call(rbind.fill, lapply(dat, function(x){ df <- data.frame( numFound = x$doclist$numFound, start = x$doclist$start, stringsAsFactors = FALSE ) cbind(df, do.call(rbind.fill, lapply(x$doclist$docs, data.frame, stringsAsFactors = FALSE))) })) } } } } else { datout <- input$grouped } } else { temp <- xml_find_all(input, '//lst[@name="grouped"]/lst') if (parsetype == 'df') { datout <- stats::setNames(lapply(temp, function(e){ tt <- xml_find_first(e, 'arr[@name="groups"]') dplyr::bind_rows(stats::setNames(lapply(xml_children(tt), function(f){ docc <- xml_find_all(f, 'result[@name="doclist"]/doc') df <- dplyr::bind_rows(lapply(docc, function(g){ as_data_frame(sapply(xml_children(g), nmtxt)) })) add_column( df, numFound = xml_attr(xml_find_first(f, "result"), "numFound"), start = xml_attr(xml_find_first(f, "result"), "start"), .before = TRUE ) }), vapply(xml_children(tt), function(z) xml_text(xml_find_first(z, "str")) %||% "", "")), .id = "group" ) }), xml_attr(temp, "name")) } else { datout <- stats::setNames(lapply(temp, function(e){ tt <- xml_find_first(e, 'arr[@name="groups"]') stats::setNames(lapply(xml_children(tt), function(f){ docc <- xml_find_all(f, 'result[@name="doclist"]/doc') lst <- lapply(docc, function(g){ sapply(xml_children(g), nmtxt) }) list( docs = lst, numFound = xml_attr(xml_find_first(f, "result"), "numFound"), start = xml_attr(xml_find_first(f, "result"), "start") ) }), vapply(xml_children(tt), function(z) xml_text(xml_find_first(z, "str")) %||% "", "")) }), xml_attr(temp, "name")) } } return( datout ) } # helper fxns --------------------- nmtxt <- function(x) { as.list(stats::setNames(xml2::xml_text(x), xml2::xml_attr(x, "name"))) } add_atts <- function(x, atts = NULL) { if (!is.null(atts)) { for (i in seq_along(atts)) { attr(x, names(atts)[i]) <- atts[[i]] } return(x) } else { return(x) } } parse_it <- function(x, wt) { switch( wt, xml = { xml2::read_xml(unclass(x)) }, json = { jsonlite::fromJSON(x, simplifyDataFrame = FALSE, simplifyMatrix = FALSE) }, csv = { tibble::as_data_frame( read.table(text = x, sep = ",", stringsAsFactors = FALSE, header = TRUE, fill = TRUE, comment.char = "") ) } ) } parse_ch <- function(x, parsetype, concat) { parsed <- cont_parse(x, attr(x, "wt")) structure(parsed, class = c(class(parsed), class(x))) } cont_parse <- function(x, wt) { structure(parse_it(x, wt), wt = wt) } # facet.pivot helpers -------------- #' Flatten facet.pivot responses #' #' Convert a nested hierarchy of facet.pivot elements #' to tabular data (rows and columns) #' #' @param df_w_pivot a \code{data.frame} with another #' \code{data.frame} nested inside representing a #' pivot reponse #' @return a \code{data.frame} #' #' @keywords internal pivot_flatten_tabular <- function(df_w_pivot){ # drop last column assumed to be named "pivot" parent <- df_w_pivot[head(names(df_w_pivot),-1)] pivot <- df_w_pivot$pivot pp <- list() for (i in 1:nrow(parent)) { if ((!is.null(pivot[[i]])) && (nrow(pivot[[i]]) > 0)) { # from parent drop last column assumed to be named "count" to not create duplicate columns of information pp[[i]] <- data.frame(cbind(parent[i,], pivot[[i]], row.names = NULL)) } } flattened_pivot <- do.call('rbind', pp) # return a tbl_df to flatten again if necessary return(list(parent = parent, flattened_pivot = flattened_pivot)) } #' Collapse Pivot Field and Value Columns #' #' Convert a table consisting of columns in sets of 3 #' into 2 columns assuming that the first column of every set of 3 #' (field) is duplicated throughout all rows and should be removed. #' This type of structure is usually returned by facet.pivot responses. #' #' @param data a \code{data.frame} with every 2 columns #' representing a field and value and the final representing #' a count #' @return a \code{data.frame} #' #' @keywords internal collapse_pivot_names <- function(data){ # shift field name to the column name to its right for (i in seq(1, ncol(data) - 1, by = 3)) { names(data)[i + 1] <- data[1, i] } # remove columns with duplicating information (anything named field) data <- data[-c(seq(1, ncol(data) - 1, by = 3))] # remove vestigial count columns if (ncol(data) > 2) { data <- data[-c(seq(0, ncol(data) - 1, by = 2))] } names(data)[length(data)] <- 'count' return(data) } solrium/R/add.R0000644000176200001440000000627513167507346013046 0ustar liggesusers#' Add documents from R objects #' #' @export #' @param x Documents, either as rows in a data.frame, or a list. #' @param conn A solrium connection object, see [SolrClient] #' @param name (character) A collection or core name. Required. #' @param commit (logical) If `TRUE`, documents immediately searchable. #' Default: `TRUE` #' @param commit_within (numeric) Milliseconds to commit the change, the #' document will be added within that time. Default: NULL #' @param overwrite (logical) Overwrite documents with matching keys. #' Default: `TRUE` #' @param boost (numeric) Boost factor. Default: NULL #' @param wt (character) One of json (default) or xml. If json, uses #' \code{\link[jsonlite]{fromJSON}} to parse. If xml, uses \code{\link[xml2]{read_xml}} to #' parse #' @param raw (logical) If `TRUE`, returns raw data in format specified by #' \code{wt} param #' @param ... curl options passed on to [crul::HttpClient] #' #' @details Works for Collections as well as Cores (in SolrCloud and Standalone #' modes, respectively) #' #' @seealso \code{\link{update_json}}, \code{\link{update_xml}}, #' \code{\link{update_csv}} for adding documents from files #' #' @examples \dontrun{ #' (cli <- SolrClient$new()) #' #' # create the boooks collection #' if (!collection_exists(cli, "books")) { #' collection_create(cli, name = "books", numShards = 1) #' } #' #' # Documents in a list #' ss <- list(list(id = 1, price = 100), list(id = 2, price = 500)) #' add(ss, cli, name = "books") #' cli$get(c(1, 2), "books") #' #' # Documents in a data.frame #' ## Simple example #' df <- data.frame(id = c(67, 68), price = c(1000, 500000000)) #' add(df, cli, "books") #' df <- data.frame(id = c(77, 78), price = c(1, 2.40)) #' add(df, "books") #' #' ## More complex example, get file from package examples #' # start Solr in Schemaless mode first: bin/solr start -e schemaless #' file <- system.file("examples", "books.csv", package = "solrium") #' x <- read.csv(file, stringsAsFactors = FALSE) #' class(x) #' head(x) #' if (!collection_exists("mybooks")) { #' collection_create(name = "mybooks", numShards = 2) #' } #' add(x, "mybooks") #' #' # Use modifiers #' add(x, "mybooks", commit_within = 5000) #' #' # Get back XML instead of a list #' ss <- list(list(id = 1, price = 100), list(id = 2, price = 500)) #' # parsed XML #' add(ss, name = "books", wt = "xml") #' # raw XML #' add(ss, name = "books", wt = "xml", raw = TRUE) #' } add <- function(x, conn, name, commit = TRUE, commit_within = NULL, overwrite = TRUE, boost = NULL, wt = 'json', raw = FALSE, ...) { UseMethod("add") } #' @export add.list <- function(x, conn, name, commit = TRUE, commit_within = NULL, overwrite = TRUE, boost = NULL, wt = 'json', raw = FALSE, ...) { check_sr(conn) if (!is.null(boost)) { x <- lapply(x, function(z) modifyList(z, list(boost = boost))) } conn$add(x, name, commit, commit_within, overwrite, boost, wt, raw, ...) } #' @export add.data.frame <- function(x, conn, name, commit = TRUE, commit_within = NULL, overwrite = TRUE, boost = NULL, wt = 'json', raw = FALSE, ...) { check_sr(conn) if (!is.null(boost)) x$boost <- boost x <- apply(x, 1, as.list) conn$add(x, name, commit, commit_within, overwrite, boost, wt, raw, ...) } solrium/R/content_types.R0000644000176200001440000000062513167507346015205 0ustar liggesusersctype_xml <- function() list(`Content-Type` = "application/xml") ctype_json <- function() list(`Content-Type` = "application/json") ctype_csv <- function() list(`Content-Type` = "application/csv") ctype <- function(x) list(`Content-Type` = x) get_ctype <- function(x) { switch(x, xml = ctype_xml(), json = ctype_json(), csv = ctype("application/csv; charset=utf-8") ) } solrium/R/core_status.R0000644000176200001440000000153113167507346014637 0ustar liggesusers#' Get core status #' #' @export #' #' @inheritParams core_create #' @param indexInfo (logical) #' @examples \dontrun{ #' # start Solr with Schemaless mode via the schemaless eg: #' # bin/solr start -e schemaless #' # you can create a new core like: bin/solr create -c corename #' # where is the name for your core - or creaate as below #' #' # connect #' (conn <- SolrClient$new()) #' #' # Status of all cores #' conn$core_status() #' #' # Status of particular cores #' conn$core_status("gettingstarted") #' #' # Get index info or not #' ## Default: TRUE #' conn$core_status("gettingstarted", indexInfo = TRUE) #' conn$core_status("gettingstarted", indexInfo = FALSE) #' } core_status <- function(conn, name = NULL, indexInfo = TRUE, raw = FALSE, callopts=list()) { conn$core_status(name, indexInfo, raw, callopts) } solrium/R/collection_addrole.R0000644000176200001440000000244713176211115016122 0ustar liggesusers#' @title Add a role to a node #' #' @description Assign a role to a given node in the cluster. The only supported role #' as of 4.7 is 'overseer' . Use this API to dedicate a particular node as Overseer. #' Invoke it multiple times to add more nodes. This is useful in large clusters where #' an Overseer is likely to get overloaded . If available, one among the list of #' nodes which are assigned the 'overseer' role would become the overseer. The #' system would assign the role to any other node if none of the designated nodes #' are up and running #' #' @export #' @param conn A solrium connection object, see [SolrClient] #' @param role (character) Required. The name of the role. The only supported role #' as of now is overseer (set as default). #' @param node (character) Required. The name of the node. It is possible to assign a #' role even before that node is started. #' @param raw (logical) If `TRUE`, returns raw data #' @param ... curl options passed on to [crul::HttpClient] #' @examples \dontrun{ #' (conn <- SolrClient$new()) #' #' # get list of nodes #' nodes <- conn$collection_clusterstatus()$cluster$live_nodes #' collection_addrole(conn, node = nodes[1]) #' } collection_addrole <- function(conn, role = "overseer", node, raw = FALSE, ...) { conn$collection_addrole(role, node, raw, ...) } solrium/R/core_mergeindexes.R0000644000176200001440000000257313167507346016002 0ustar liggesusers#' @title Merge indexes (cores) #' #' @description Merges one or more indexes to another index. The indexes must #' have completed commits, and should be locked against writes until the merge #' is complete or the resulting merged index may become corrupted. The target #' core index must already exist and have a compatible schema with the one or #' more indexes that will be merged to it. #' #' @export #' #' @inheritParams core_create #' @param indexDir (character) Multi-valued, directories that would be merged. #' @param srcCore (character) Multi-valued, source cores that would be merged. #' @param async (character) Request ID to track this action which will be #' processed asynchronously #' @examples \dontrun{ #' # start Solr with Schemaless mode via the schemaless eg: #' # bin/solr start -e schemaless #' #' # connect #' (conn <- SolrClient$new()) #' #' ## FIXME: not tested yet #' #' # use indexDir parameter #' conn$core_mergeindexes(core="new_core_name", #' indexDir = c("/solr_home/core1/data/index", #' "/solr_home/core2/data/index")) #' #' # use srcCore parameter #' conn$core_mergeindexes(name = "new_core_name", srcCore = c('core1', 'core2')) #' } core_mergeindexes <- function(conn, name, indexDir = NULL, srcCore = NULL, async = NULL, raw = FALSE, callopts = list()) { conn$core_mergeindexes(name, indexDir, srcCore, async, raw, callopts) } solrium/R/collection_addreplicaprop.R0000644000176200001440000000362013176211067017501 0ustar liggesusers#' @title Add a replica property #' #' @description Assign an arbitrary property to a particular replica and give it #' the value specified. If the property already exists, it will be overwritten #' with the new value. #' #' @export #' @inheritParams collection_create #' @param shard (character) Required. The name of the shard the replica #' belongs to #' @param replica (character) Required. The replica, e.g. core_node1. #' @param property (character) Required. The property to add. Note: this will #' have the literal 'property.' prepended to distinguish it from #' system-maintained properties. So these two forms are equivalent: #' `property=special` and `property=property.special` #' @param property.value (character) Required. The value to assign to #' the property #' @param shardUnique (logical) If `TRUE`, then setting this property in one #' replica will (1) remove the property from all other replicas in that shard #' Default: `FALSE` #' @examples \dontrun{ #' (conn <- SolrClient$new()) #' #' # create collection #' if (!conn$collection_exists("addrep")) { #' conn$collection_create(name = "addrep", numShards = 1) #' # OR bin/solr create -c addrep #' } #' #' # status #' conn$collection_clusterstatus()$cluster$collections$addrep$shards #' #' # add the value world to the property hello #' conn$collection_addreplicaprop(name = "addrep", shard = "shard1", #' replica = "core_node1", property = "hello", property.value = "world") #' #' # check status #' conn$collection_clusterstatus()$cluster$collections$addrep$shards #' conn$collection_clusterstatus()$cluster$collections$addrep$shards$shard1$replicas$core_node1 #' } collection_addreplicaprop <- function(conn, name, shard, replica, property, property.value, shardUnique = FALSE, raw = FALSE, callopts=list()) { conn$collection_addreplicaprop(name, shard, replica, property, property.value, shardUnique, raw, callopts) } solrium/vignettes/0000755000176200001440000000000013176475641013772 5ustar liggesuserssolrium/vignettes/cores_collections.Rmd0000644000176200001440000000365113055337615020146 0ustar liggesusers Cores/collections management ============================ ## Installation Stable version from CRAN ```r install.packages("solrium") ``` Or the development version from GitHub ```r install.packages("devtools") devtools::install_github("ropensci/solrium") ``` Load ```r library("solrium") ``` Initialize connection ```r solr_connect() ``` ``` #> #> url: http://localhost:8983 #> errors: simple #> verbose: TRUE #> proxy: ``` ## Cores There are many operations you can do on cores, including: * `core_create()` - create a core * `core_exists()` - check if a core exists * `core_mergeindexes()` - merge indexes * `core_reload()` - reload a core * `core_rename()` - rename a core * `core_requeststatus()` - check request status * `core_split()` - split a core * `core_status()` - check core status * `core_swap()` - core swap * `core_unload()` - delete a core ### Create a core ```r core_create() ``` ### Delete a core ```r core_unload() ``` ## Collections There are many operations you can do on collections, including: * `collection_addreplica()` * `collection_addreplicaprop()` * `collection_addrole()` * `collection_balanceshardunique()` * `collection_clusterprop()` * `collection_clusterstatus()` * `collection_create()` * `collection_createalias()` * `collection_createshard()` * `collection_delete()` * `collection_deletealias()` * `collection_deletereplica()` * `collection_deletereplicaprop()` * `collection_deleteshard()` * `collection_list()` * `collection_migrate()` * `collection_overseerstatus()` * `collection_rebalanceleaders()` * `collection_reload()` * `collection_removerole()` * `collection_requeststatus()` * `collection_splitshard()` ### Create a collection ```r collection_create() ``` ### Delete a collection ```r collection_delete() ``` solrium/vignettes/document_management.Rmd0000644000176200001440000001256713055337615020455 0ustar liggesusers Document management =================== ## Installation Stable version from CRAN ```r install.packages("solrium") ``` Or the development version from GitHub ```r install.packages("devtools") devtools::install_github("ropensci/solrium") ``` Load ```r library("solrium") ``` Initialize connection. By default, you connect to `http://localhost:8983` ```r solr_connect() ``` ``` #> #> url: http://localhost:8983 #> errors: simple #> verbose: TRUE #> proxy: ``` ## Create documents from R objects For now, only lists and data.frame's supported. ### data.frame ```r df <- data.frame(id = c(67, 68), price = c(1000, 500000000)) add(df, "books") ``` ``` #> $responseHeader #> $responseHeader$status #> [1] 0 #> #> $responseHeader$QTime #> [1] 112 ``` ### list ```r ss <- list(list(id = 1, price = 100), list(id = 2, price = 500)) add(ss, "books") ``` ``` #> $responseHeader #> $responseHeader$status #> [1] 0 #> #> $responseHeader$QTime #> [1] 16 ``` ## Delete documents ### By id Add some documents first ```r docs <- list(list(id = 1, price = 100, name = "brown"), list(id = 2, price = 500, name = "blue"), list(id = 3, price = 2000L, name = "pink")) add(docs, "gettingstarted") ``` ``` #> $responseHeader #> $responseHeader$status #> [1] 0 #> #> $responseHeader$QTime #> [1] 18 ``` And the documents are now in your Solr database ```r tail(solr_search(name = "gettingstarted", "*:*", base = "http://localhost:8983/solr/select", rows = 100)) ``` ``` #> Source: local data frame [3 x 4] #> #> id price name _version_ #> (chr) (int) (chr) (dbl) #> 1 1 100 brown 1.525729e+18 #> 2 2 500 blue 1.525729e+18 #> 3 3 2000 pink 1.525729e+18 ``` Now delete those documents just added ```r delete_by_id(ids = c(1, 2, 3), "gettingstarted") ``` ``` #> $responseHeader #> $responseHeader$status #> [1] 0 #> #> $responseHeader$QTime #> [1] 24 ``` And now they are gone ```r tail(solr_search("gettingstarted", "*:*", base = "http://localhost:8983/solr/select", rows = 100)) ``` ``` #> Source: local data frame [0 x 0] ``` ### By query Add some documents first ```r add(docs, "gettingstarted") ``` ``` #> $responseHeader #> $responseHeader$status #> [1] 0 #> #> $responseHeader$QTime #> [1] 19 ``` And the documents are now in your Solr database ```r tail(solr_search("gettingstarted", "*:*", base = "http://localhost:8983/solr/select", rows = 100)) ``` ``` #> Source: local data frame [3 x 4] #> #> id price name _version_ #> (chr) (int) (chr) (dbl) #> 1 1 100 brown 1.525729e+18 #> 2 2 500 blue 1.525729e+18 #> 3 3 2000 pink 1.525729e+18 ``` Now delete those documents just added ```r delete_by_query(query = "(name:blue OR name:pink)", "gettingstarted") ``` ``` #> $responseHeader #> $responseHeader$status #> [1] 0 #> #> $responseHeader$QTime #> [1] 12 ``` And now they are gone ```r tail(solr_search("gettingstarted", "*:*", base = "http://localhost:8983/solr/select", rows = 100)) ``` ``` #> Source: local data frame [1 x 4] #> #> id price name _version_ #> (chr) (int) (chr) (dbl) #> 1 1 100 brown 1.525729e+18 ``` ## Update documents from files This approach is best if you have many different things you want to do at once, e.g., delete and add files and set any additional options. The functions are: * `update_xml()` * `update_json()` * `update_csv()` There are separate functions for each of the data types as they take slightly different parameters - and to make it more clear that those are the three input options for data types. ### JSON ```r file <- system.file("examples", "books.json", package = "solrium") update_json(file, "books") ``` ``` #> $responseHeader #> $responseHeader$status #> [1] 0 #> #> $responseHeader$QTime #> [1] 39 ``` ### Add and delete in the same file Add a document first, that we can later delete ```r ss <- list(list(id = 456, name = "cat")) add(ss, "books") ``` ``` #> $responseHeader #> $responseHeader$status #> [1] 0 #> #> $responseHeader$QTime #> [1] 19 ``` Now add a new document, and delete the one we just made ```r file <- system.file("examples", "add_delete.xml", package = "solrium") cat(readLines(file), sep = "\n") ``` ``` #> #> #> #> 978-0641723445 #> book,hardcover #> The Lightning Thief #> Rick Riordan #> Percy Jackson and the Olympians #> 1 #> fantasy #> TRUE #> 12.5 #> 384 #> #> #> #> 456 #> #> ``` ```r update_xml(file, "books") ``` ``` #> $responseHeader #> $responseHeader$status #> [1] 0 #> #> $responseHeader$QTime #> [1] 23 ``` ### Notes Note that `update_xml()` and `update_json()` have exactly the same parameters, but simply use different data input formats. `update_csv()` is different in that you can't provide document or field level boosts or other modifications. In addition `update_csv()` can accept not just csv, but tsv and other types of separators. solrium/vignettes/local_setup.Rmd0000644000176200001440000001303213176457375016753 0ustar liggesusers Local Solr setup ====== ### OSX __Based on http://lucene.apache.org/solr/quickstart.html__ 1. Download most recent version from an Apache mirror http://www.apache.org/dyn/closer.cgi/lucene/solr/5.4.1 2. Unzip/untar the file. Move to your desired location. Now you have Solr `v.5.4.1` 3. Go into the directory you just created: `cd solr-5.4.1` 4. Launch Solr: `bin/solr start -e cloud -noprompt` - Sets up SolrCloud mode, rather than Standalone mode. As far as I can tell, SolrCloud mode seems more common. 5. Once Step 4 completes, you can go to `http://localhost:8983/solr/` now, which is the admin interface for Solr. 6. Load some documents: `bin/post -c gettingstarted docs/` 7. Once Step 6 is complete (will take a few minutes), navigate in your browser to `http://localhost:8983/solr/gettingstarted/select?q=*:*&wt=json` and you should see a bunch of documents ### Linux > You should be able to use the above instructions for OSX on a Linux machine. #### Linuxbrew [Linuxbrew](http://linuxbrew.sh/) is a port of Mac OS homebrew to linux. Operation is essentially the same as for homebrew. Follow the installation instructions for linuxbrew and then the instructions for using homebrew (above) should work without modification. ### Windows You should be able to use the above instructions for OSX on a Windows machine, but with some slight differences. For example, the `bin/post` tool for OSX and Linux doesn't work on Windows, but see https://cwiki.apache.org/confluence/display/solr/Post+Tool#PostTool-Windows for an equivalent. ### `solrium` usage And we can now use the `solrium` R package to query the Solr database to get raw JSON data: ```r solr_connect('http://localhost:8983') solr_search("gettingstarted", q = '*:*', raw = TRUE, rows = 3) #> [1] "{\"responseHeader\":{\"status\":0,\"QTime\":8,\"params\":{\"q\":\"*:*\",\"rows\":\"3\",\"wt\":\"json\"}},\"response\":{\"numFound\":3577,\"start\":0,\"maxScore\":1.0,\"docs\":[{\"id\":\"/Users/sacmac/solr-5.2.1/docs/solr-core/org/apache/solr/highlight/class-use/SolrFragmenter.html\",\"stream_size\":[9016],\"date\":[\"2015-06-10T00:00:00Z\"],\"x_parsed_by\":[\"org.apache.tika.parser.DefaultParser\",\"org.apache.tika.parser.html.HtmlParser\"],\"stream_content_type\":[\"text/html\"],\"dc_title\":[\"Uses of Interface org.apache.solr.highlight.SolrFragmenter (Solr 5.2.1 API)\"],\"content_encoding\":[\"UTF-8\"],\"resourcename\":[\"/Users/sacmac/solr-5.2.1/docs/solr-core/org/apache/solr/highlight/class-use/SolrFragmenter.html\"],\"title\":[\"Uses of Interface org.apache.solr.highlight.SolrFragmenter (Solr 5.2.1 API)\"],\"content_type\":[\"text/html\"],\"_version_\":1507965023127863296},{\"id\":\"/Users/sacmac/solr-5.2.1/docs/solr-core/org/apache/solr/highlight/class-use/SolrFragmentsBuilder.html\",\"stream_size\":[10336],\"date\":[\"2015-06-10T00:00:00Z\"],\"x_parsed_by\":[\"org.apache.tika.parser.DefaultParser\",\"org.apache.tika.parser.html.HtmlParser\"],\"stream_content_type\":[\"text/html\"],\"dc_title\":[\"Uses of Class org.apache.solr.highlight.SolrFragmentsBuilder (Solr 5.2.1 API)\"],\"content_encoding\":[\"UTF-8\"],\"resourcename\":[\"/Users/sacmac/solr-5.2.1/docs/solr-core/org/apache/solr/highlight/class-use/SolrFragmentsBuilder.html\"],\"title\":[\"Uses of Class org.apache.solr.highlight.SolrFragmentsBuilder (Solr 5.2.1 API)\"],\"content_type\":[\"text/html\"],\"_version_\":1507965023153029120},{\"id\":\"/Users/sacmac/solr-5.2.1/docs/solr-core/org/apache/solr/internal/csv/CSVParser.html\",\"stream_size\":[32427],\"date\":[\"2015-06-10T00:00:00Z\"],\"x_parsed_by\":[\"org.apache.tika.parser.DefaultParser\",\"org.apache.tika.parser.html.HtmlParser\"],\"stream_content_type\":[\"text/html\"],\"dc_title\":[\"CSVParser (Solr 5.2.1 API)\"],\"content_encoding\":[\"UTF-8\"],\"resourcename\":[\"/Users/sacmac/solr-5.2.1/docs/solr-core/org/apache/solr/internal/csv/CSVParser.html\"],\"title\":[\"CSVParser (Solr 5.2.1 API)\"],\"content_type\":[\"text/html\"],\"_version_\":1507965023221186560}]}}\n" #> attr(,"class") #> [1] "sr_search" #> attr(,"wt") #> [1] "json" ``` Or parsed data to a data.frame (just looking at a few columns for brevity): ```r solr_search("gettingstarted", q = '*:*', fl = c('date', 'title')) #> Source: local data frame [10 x 2] #> #> date title #> 1 2015-06-10T00:00:00Z Uses of Interface org.apache.solr.highlight.SolrFragmenter (Solr 5.2.1 API) #> 2 2015-06-10T00:00:00Z Uses of Class org.apache.solr.highlight.SolrFragmentsBuilder (Solr 5.2.1 API) #> 3 2015-06-10T00:00:00Z CSVParser (Solr 5.2.1 API) #> 4 2015-06-10T00:00:00Z CSVUtils (Solr 5.2.1 API) #> 5 2015-06-10T00:00:00Z org.apache.solr.internal.csv (Solr 5.2.1 API) #> 6 2015-06-10T00:00:00Z org.apache.solr.internal.csv Class Hierarchy (Solr 5.2.1 API) #> 7 2015-06-10T00:00:00Z Uses of Class org.apache.solr.internal.csv.CSVStrategy (Solr 5.2.1 API) #> 8 2015-06-10T00:00:00Z Uses of Class org.apache.solr.internal.csv.CSVUtils (Solr 5.2.1 API) #> 9 2015-06-10T00:00:00Z CSVConfig (Solr 5.2.1 API) #> 10 2015-06-10T00:00:00Z CSVConfigGuesser (Solr 5.2.1 API) ``` See the other vignettes for more thorough examples: * `Document management` * `Cores/collections management` * `Solr Search` solrium/vignettes/search.Rmd0000644000176200001440000005047613055337615015711 0ustar liggesusers Solr search =========== **A general purpose R interface to [Apache Solr](http://lucene.apache.org/solr/)** ## Solr info + [Solr home page](http://lucene.apache.org/solr/) + [Highlighting help](http://wiki.apache.org/solr/HighlightingParameters) + [Faceting help](http://wiki.apache.org/solr/SimpleFacetParameters) + [Install and Setup SOLR in OSX, including running Solr](http://risnandar.wordpress.com/2013/09/08/how-to-install-and-setup-apache-lucene-solr-in-osx/) ## Installation Stable version from CRAN ```r install.packages("solrium") ``` Or the development version from GitHub ```r install.packages("devtools") devtools::install_github("ropensci/solrium") ``` Load ```r library("solrium") ``` ## Setup connection You can setup for a remote Solr instance or on your local machine. ```r solr_connect('http://api.plos.org/search') #> #> url: http://api.plos.org/search #> errors: simple #> verbose: TRUE #> proxy: ``` ## Rundown `solr_search()` only returns the `docs` element of a Solr response body. If `docs` is all you need, then this function will do the job. If you need facet data only, or mlt data only, see the appropriate functions for each of those below. Another function, `solr_all()` has a similar interface in terms of parameter as `solr_search()`, but returns all parts of the response body, including, facets, mlt, groups, stats, etc. as long as you request those. ## Search docs `solr_search()` returns only docs. A basic search: ```r solr_search(q = '*:*', rows = 2, fl = 'id') #> Source: local data frame [2 x 1] #> #> id #> (chr) #> 1 10.1371/journal.pone.0142243/references #> 2 10.1371/journal.pone.0142243/body ``` __Search in specific fields with `:`__ Search for word ecology in title and cell in the body ```r solr_search(q = 'title:"ecology" AND body:"cell"', fl = 'title', rows = 5) #> Source: local data frame [5 x 1] #> #> title #> (chr) #> 1 The Ecology of Collective Behavior #> 2 Ecology's Big, Hot Idea #> 3 Spatial Ecology of Bacteria at the Microscale in Soil #> 4 Biofilm Formation As a Response to Ecological Competition #> 5 Ecology of Root Colonizing Massilia (Oxalobacteraceae) ``` __Wildcards__ Search for word that starts with "cell" in the title field ```r solr_search(q = 'title:"cell*"', fl = 'title', rows = 5) #> Source: local data frame [5 x 1] #> #> title #> (chr) #> 1 Tumor Cell Recognition Efficiency by T Cells #> 2 Cancer Stem Cell-Like Side Population Cells in Clear Cell Renal Cell Carcin #> 3 Dcas Supports Cell Polarization and Cell-Cell Adhesion Complexes in Develop #> 4 Cell-Cell Contact Preserves Cell Viability via Plakoglobin #> 5 MS4a4B, a CD20 Homologue in T Cells, Inhibits T Cell Propagation by Modulat ``` __Proximity search__ Search for words "sports" and "alcohol" within four words of each other ```r solr_search(q = 'everything:"stem cell"~7', fl = 'title', rows = 3) #> Source: local data frame [3 x 1] #> #> title #> (chr) #> 1 Correction: Reduced Intensity Conditioning, Combined Transplantation of Hap #> 2 A Recipe for Self-Renewing Brain #> 3 Gene Expression Profile Created for Mouse Stem Cells and Developing Embryo ``` __Range searches__ Search for articles with Twitter count between 5 and 10 ```r solr_search(q = '*:*', fl = c('alm_twitterCount', 'id'), fq = 'alm_twitterCount:[5 TO 50]', rows = 10) #> Source: local data frame [10 x 2] #> #> id alm_twitterCount #> (chr) (int) #> 1 10.1371/journal.ppat.1005403/introduction 6 #> 2 10.1371/journal.ppat.1005403/results_and_discussion 6 #> 3 10.1371/journal.ppat.1005403/materials_and_methods 6 #> 4 10.1371/journal.ppat.1005403/supporting_information 6 #> 5 10.1371/journal.ppat.1005401 6 #> 6 10.1371/journal.ppat.1005401/title 6 #> 7 10.1371/journal.ppat.1005401/abstract 6 #> 8 10.1371/journal.ppat.1005401/references 6 #> 9 10.1371/journal.ppat.1005401/body 6 #> 10 10.1371/journal.ppat.1005401/introduction 6 ``` __Boosts__ Assign higher boost to title matches than to body matches (compare the two calls) ```r solr_search(q = 'title:"cell" abstract:"science"', fl = 'title', rows = 3) #> Source: local data frame [3 x 1] #> #> title #> (chr) #> 1 I Want More and Better Cells! – An Outreach Project about Stem Cells and It #> 2 Centre of the Cell: Science Comes to Life #> 3 Globalization of Stem Cell Science: An Examination of Current and Past Coll ``` ```r solr_search(q = 'title:"cell"^1.5 AND abstract:"science"', fl = 'title', rows = 3) #> Source: local data frame [3 x 1] #> #> title #> (chr) #> 1 Centre of the Cell: Science Comes to Life #> 2 I Want More and Better Cells! – An Outreach Project about Stem Cells and It #> 3 Derivation of Hair-Inducing Cell from Human Pluripotent Stem Cells ``` ## Search all `solr_all()` differs from `solr_search()` in that it allows specifying facets, mlt, groups, stats, etc, and returns all of those. It defaults to `parsetype = "list"` and `wt="json"`, whereas `solr_search()` defaults to `parsetype = "df"` and `wt="csv"`. `solr_all()` returns by default a list, whereas `solr_search()` by default returns a data.frame. A basic search, just docs output ```r solr_all(q = '*:*', rows = 2, fl = 'id') #> $response #> $response$numFound #> [1] 1502814 #> #> $response$start #> [1] 0 #> #> $response$docs #> $response$docs[[1]] #> $response$docs[[1]]$id #> [1] "10.1371/journal.pone.0142243/references" #> #> #> $response$docs[[2]] #> $response$docs[[2]]$id #> [1] "10.1371/journal.pone.0142243/body" ``` Get docs, mlt, and stats output ```r solr_all(q = 'ecology', rows = 2, fl = 'id', mlt = 'true', mlt.count = 2, mlt.fl = 'abstract', stats = 'true', stats.field = 'counter_total_all') #> $response #> $response$numFound #> [1] 31467 #> #> $response$start #> [1] 0 #> #> $response$docs #> $response$docs[[1]] #> $response$docs[[1]]$id #> [1] "10.1371/journal.pone.0059813" #> #> #> $response$docs[[2]] #> $response$docs[[2]]$id #> [1] "10.1371/journal.pone.0001248" #> #> #> #> #> $moreLikeThis #> $moreLikeThis$`10.1371/journal.pone.0059813` #> $moreLikeThis$`10.1371/journal.pone.0059813`$numFound #> [1] 152704 #> #> $moreLikeThis$`10.1371/journal.pone.0059813`$start #> [1] 0 #> #> $moreLikeThis$`10.1371/journal.pone.0059813`$docs #> $moreLikeThis$`10.1371/journal.pone.0059813`$docs[[1]] #> $moreLikeThis$`10.1371/journal.pone.0059813`$docs[[1]]$id #> [1] "10.1371/journal.pone.0111996" #> #> #> $moreLikeThis$`10.1371/journal.pone.0059813`$docs[[2]] #> $moreLikeThis$`10.1371/journal.pone.0059813`$docs[[2]]$id #> [1] "10.1371/journal.pone.0143687" #> #> #> #> #> $moreLikeThis$`10.1371/journal.pone.0001248` #> $moreLikeThis$`10.1371/journal.pone.0001248`$numFound #> [1] 159058 #> #> $moreLikeThis$`10.1371/journal.pone.0001248`$start #> [1] 0 #> #> $moreLikeThis$`10.1371/journal.pone.0001248`$docs #> $moreLikeThis$`10.1371/journal.pone.0001248`$docs[[1]] #> $moreLikeThis$`10.1371/journal.pone.0001248`$docs[[1]]$id #> [1] "10.1371/journal.pone.0001275" #> #> #> $moreLikeThis$`10.1371/journal.pone.0001248`$docs[[2]] #> $moreLikeThis$`10.1371/journal.pone.0001248`$docs[[2]]$id #> [1] "10.1371/journal.pone.0024192" #> #> #> #> #> #> $stats #> $stats$stats_fields #> $stats$stats_fields$counter_total_all #> $stats$stats_fields$counter_total_all$min #> [1] 16 #> #> $stats$stats_fields$counter_total_all$max #> [1] 367697 #> #> $stats$stats_fields$counter_total_all$count #> [1] 31467 #> #> $stats$stats_fields$counter_total_all$missing #> [1] 0 #> #> $stats$stats_fields$counter_total_all$sum #> [1] 141552408 #> #> $stats$stats_fields$counter_total_all$sumOfSquares #> [1] 3.162032e+12 #> #> $stats$stats_fields$counter_total_all$mean #> [1] 4498.44 #> #> $stats$stats_fields$counter_total_all$stddev #> [1] 8958.45 #> #> $stats$stats_fields$counter_total_all$facets #> named list() ``` ## Facet ```r solr_facet(q = '*:*', facet.field = 'journal', facet.query = c('cell', 'bird')) #> $facet_queries #> term value #> 1 cell 128657 #> 2 bird 13063 #> #> $facet_fields #> $facet_fields$journal #> X1 X2 #> 1 plos one 1233662 #> 2 plos genetics 49285 #> 3 plos pathogens 42817 #> 4 plos computational biology 36373 #> 5 plos neglected tropical diseases 33911 #> 6 plos biology 28745 #> 7 plos medicine 19934 #> 8 plos clinical trials 521 #> 9 plos medicin 9 #> #> #> $facet_pivot #> NULL #> #> $facet_dates #> NULL #> #> $facet_ranges #> NULL ``` ## Highlight ```r solr_highlight(q = 'alcohol', hl.fl = 'abstract', rows = 2) #> $`10.1371/journal.pmed.0040151` #> $`10.1371/journal.pmed.0040151`$abstract #> [1] "Background: Alcohol consumption causes an estimated 4% of the global disease burden, prompting" #> #> #> $`10.1371/journal.pone.0027752` #> $`10.1371/journal.pone.0027752`$abstract #> [1] "Background: The negative influences of alcohol on TB management with regard to delays in seeking" ``` ## Stats ```r out <- solr_stats(q = 'ecology', stats.field = c('counter_total_all', 'alm_twitterCount'), stats.facet = c('journal', 'volume')) ``` ```r out$data #> min max count missing sum sumOfSquares #> counter_total_all 16 367697 31467 0 141552408 3.162032e+12 #> alm_twitterCount 0 1756 31467 0 168586 3.267801e+07 #> mean stddev #> counter_total_all 4498.439889 8958.45030 #> alm_twitterCount 5.357549 31.77757 ``` ```r out$facet #> $counter_total_all #> $counter_total_all$volume #> min max count missing sum sumOfSquares mean stddev #> 1 20 166202 887 0 2645927 63864880371 2983.007 7948.200 #> 2 495 103147 105 0 1017325 23587444387 9688.810 11490.287 #> 3 1950 69628 69 0 704216 13763808310 10206.029 9834.333 #> 4 742 13856 9 0 48373 375236903 5374.778 3795.438 #> 5 1871 182622 81 0 1509647 87261688837 18637.617 27185.811 #> 6 1667 117922 482 0 5836186 162503606896 12108.270 13817.754 #> 7 1340 128083 741 0 7714963 188647618509 10411.556 12098.852 #> 8 667 362410 1010 0 9692492 340237069126 9596.527 15653.040 #> 9 103 113220 1539 0 12095764 218958657256 7859.496 8975.188 #> 10 72 243873 2948 0 17699332 327210596846 6003.844 8658.717 #> 11 51 184259 4825 0 24198104 382922818910 5015.151 7363.541 #> 12 16 367697 6360 0 26374352 533183277470 4146.911 8163.790 #> 13 42 287741 6620 0 21003701 612616254755 3172.765 9082.194 #> 14 128 161520 5791 0 11012026 206899109466 1901.576 5667.209 #> volume #> 1 11 #> 2 12 #> 3 13 #> 4 14 #> 5 1 #> 6 2 #> 7 3 #> 8 4 #> 9 5 #> 10 6 #> 11 7 #> 12 8 #> 13 9 #> 14 10 #> #> $counter_total_all$journal #> min max count missing sum sumOfSquares mean stddev #> 1 667 117922 243 0 4074303 1.460258e+11 16766.679 17920.074 #> 2 742 265561 884 0 14006081 5.507548e+11 15843.983 19298.065 #> 3 8463 13797 2 0 22260 2.619796e+08 11130.000 3771.708 #> 4 16 367697 25915 0 96069530 1.943903e+12 3707.101 7827.546 #> 5 915 61956 595 0 4788553 6.579963e+10 8047.988 6774.558 #> 6 548 76290 758 0 6326284 9.168443e+10 8346.021 7167.106 #> 7 268 212048 1239 0 5876481 1.010080e+11 4742.923 7686.101 #> 8 495 287741 580 0 4211717 1.411022e+11 7261.581 13815.867 #> journal #> 1 plos medicine #> 2 plos biology #> 3 plos clinical trials #> 4 plos one #> 5 plos pathogens #> 6 plos genetics #> 7 plos neglected tropical diseases #> 8 plos computational biology #> #> #> $alm_twitterCount #> $alm_twitterCount$volume #> min max count missing sum sumOfSquares mean stddev volume #> 1 0 1756 887 0 12295 4040629 13.861330 66.092178 11 #> 2 0 1045 105 0 6466 1885054 61.580952 119.569402 12 #> 3 0 283 69 0 3478 509732 50.405797 70.128101 13 #> 4 6 274 9 0 647 102391 71.888889 83.575482 14 #> 5 0 42 81 0 176 4996 2.172840 7.594060 1 #> 6 0 74 482 0 628 15812 1.302905 5.583197 2 #> 7 0 48 741 0 652 11036 0.879892 3.760087 3 #> 8 0 239 1010 0 1039 74993 1.028713 8.559485 4 #> 9 0 126 1539 0 1901 90297 1.235218 7.562004 5 #> 10 0 886 2948 0 4357 1245453 1.477951 20.504442 6 #> 11 0 822 4825 0 19646 2037596 4.071710 20.144602 7 #> 12 0 1503 6360 0 35938 6505618 5.650629 31.482092 8 #> 13 0 1539 6620 0 49837 12847207 7.528248 43.408246 9 #> 14 0 863 5791 0 31526 3307198 5.443965 23.271216 10 #> #> $alm_twitterCount$journal #> min max count missing sum sumOfSquares mean stddev #> 1 0 777 243 0 4251 1028595 17.493827 62.79406 #> 2 0 1756 884 0 16405 6088729 18.557692 80.93655 #> 3 0 3 2 0 3 9 1.500000 2.12132 #> 4 0 1539 25915 0 123409 23521391 4.762068 29.74883 #> 5 0 122 595 0 4265 160581 7.168067 14.79428 #> 6 0 178 758 0 4277 148277 5.642480 12.80605 #> 7 0 886 1239 0 4972 1048908 4.012914 28.82956 #> 8 0 285 580 0 4166 265578 7.182759 20.17431 #> journal #> 1 plos medicine #> 2 plos biology #> 3 plos clinical trials #> 4 plos one #> 5 plos pathogens #> 6 plos genetics #> 7 plos neglected tropical diseases #> 8 plos computational biology ``` ## More like this `solr_mlt` is a function to return similar documents to the one ```r out <- solr_mlt(q = 'title:"ecology" AND body:"cell"', mlt.fl = 'title', mlt.mindf = 1, mlt.mintf = 1, fl = 'counter_total_all', rows = 5) out$docs #> Source: local data frame [5 x 2] #> #> id counter_total_all #> (chr) (int) #> 1 10.1371/journal.pbio.1001805 17081 #> 2 10.1371/journal.pbio.0020440 23882 #> 3 10.1371/journal.pone.0087217 5935 #> 4 10.1371/journal.pbio.1002191 13036 #> 5 10.1371/journal.pone.0040117 4316 ``` ```r out$mlt #> $`10.1371/journal.pbio.1001805` #> id counter_total_all #> 1 10.1371/journal.pone.0082578 2196 #> 2 10.1371/journal.pone.0098876 2448 #> 3 10.1371/journal.pone.0102159 1177 #> 4 10.1371/journal.pcbi.1002652 3102 #> 5 10.1371/journal.pcbi.1003408 6942 #> #> $`10.1371/journal.pbio.0020440` #> id counter_total_all #> 1 10.1371/journal.pone.0102679 3112 #> 2 10.1371/journal.pone.0035964 5571 #> 3 10.1371/journal.pone.0003259 2800 #> 4 10.1371/journal.pntd.0003377 3392 #> 5 10.1371/journal.pone.0068814 7522 #> #> $`10.1371/journal.pone.0087217` #> id counter_total_all #> 1 10.1371/journal.pone.0131665 409 #> 2 10.1371/journal.pcbi.0020092 19604 #> 3 10.1371/journal.pone.0133941 475 #> 4 10.1371/journal.pone.0123774 997 #> 5 10.1371/journal.pone.0140306 322 #> #> $`10.1371/journal.pbio.1002191` #> id counter_total_all #> 1 10.1371/journal.pbio.1002232 1950 #> 2 10.1371/journal.pone.0131700 979 #> 3 10.1371/journal.pone.0070448 1608 #> 4 10.1371/journal.pone.0028737 7481 #> 5 10.1371/journal.pone.0052330 5595 #> #> $`10.1371/journal.pone.0040117` #> id counter_total_all #> 1 10.1371/journal.pone.0069352 2763 #> 2 10.1371/journal.pone.0148280 467 #> 3 10.1371/journal.pone.0035502 4031 #> 4 10.1371/journal.pone.0014065 5764 #> 5 10.1371/journal.pone.0113280 1984 ``` ## Groups `solr_groups()` is a function to return similar documents to the one ```r solr_group(q = 'ecology', group.field = 'journal', group.limit = 1, fl = c('id', 'alm_twitterCount')) #> groupValue numFound start #> 1 plos one 25915 0 #> 2 plos computational biology 580 0 #> 3 plos biology 884 0 #> 4 none 1251 0 #> 5 plos medicine 243 0 #> 6 plos neglected tropical diseases 1239 0 #> 7 plos pathogens 595 0 #> 8 plos genetics 758 0 #> 9 plos clinical trials 2 0 #> id alm_twitterCount #> 1 10.1371/journal.pone.0059813 56 #> 2 10.1371/journal.pcbi.1003594 21 #> 3 10.1371/journal.pbio.1002358 16 #> 4 10.1371/journal.pone.0046671 2 #> 5 10.1371/journal.pmed.1000303 0 #> 6 10.1371/journal.pntd.0002577 2 #> 7 10.1371/journal.ppat.1003372 2 #> 8 10.1371/journal.pgen.1001197 0 #> 9 10.1371/journal.pctr.0020010 0 ``` ## Parsing `solr_parse()` is a general purpose parser function with extension methods for parsing outputs from functions in `solr`. `solr_parse()` is used internally within functions to do parsing after retrieving data from the server. You can optionally get back raw `json`, `xml`, or `csv` with the `raw=TRUE`, and then parse afterwards with `solr_parse()`. For example: ```r (out <- solr_highlight(q = 'alcohol', hl.fl = 'abstract', rows = 2, raw = TRUE)) #> [1] "{\"response\":{\"numFound\":20268,\"start\":0,\"docs\":[{},{}]},\"highlighting\":{\"10.1371/journal.pmed.0040151\":{\"abstract\":[\"Background: Alcohol consumption causes an estimated 4% of the global disease burden, prompting\"]},\"10.1371/journal.pone.0027752\":{\"abstract\":[\"Background: The negative influences of alcohol on TB management with regard to delays in seeking\"]}}}\n" #> attr(,"class") #> [1] "sr_high" #> attr(,"wt") #> [1] "json" ``` Then parse ```r solr_parse(out, 'df') #> names #> 1 10.1371/journal.pmed.0040151 #> 2 10.1371/journal.pone.0027752 #> abstract #> 1 Background: Alcohol consumption causes an estimated 4% of the global disease burden, prompting #> 2 Background: The negative influences of alcohol on TB management with regard to delays in seeking ``` [Please report any issues or bugs](https://github.com/ropensci/solrium/issues). solrium/README.md0000644000176200001440000005701113176464433013242 0ustar liggesuserssolrium ======= [![Build Status](https://travis-ci.org/ropensci/solrium.svg?branch=master)](https://travis-ci.org/ropensci/solrium) [![codecov.io](https://codecov.io/github/ropensci/solrium/coverage.svg?branch=master)](https://codecov.io/github/ropensci/solrium?branch=master) [![rstudio mirror downloads](https://cranlogs.r-pkg.org/badges/solrium?color=2ED968)](https://github.com/metacran/cranlogs.app) [![cran version](https://www.r-pkg.org/badges/version/solrium)](https://cran.r-project.org/package=solrium) **A general purpose R interface to [Solr](https://lucene.apache.org/solr/)** Development is now following Solr v7 and greater - which introduced many changes, which means many functions here may not work with your Solr installation older than v7. Be aware that currently some functions will only work in certain Solr modes, e.g, `collection_create()` won't work when you are not in Solrcloud mode. But, you should get an error message stating that you aren't. > Currently developing against Solr `v7.0.0` > Note that we recently changed the package name to `solrium`. A previous version of this package is on CRAN as `solr`, but next version will be up as `solrium`. ## Solr info + [Solr home page](http://lucene.apache.org/solr/) + [Highlighting help](https://lucene.apache.org/solr/guide/7_0/highlighting.html) + [Faceting help](http://wiki.apache.org/solr/SimpleFacetParameters) + [Solr stats](http://wiki.apache.org/solr/StatsComponent) + ['More like this' searches](http://wiki.apache.org/solr/MoreLikeThis) + [Grouping/Feild collapsing](http://wiki.apache.org/solr/FieldCollapsing) + [Install and Setup SOLR in OSX, including running Solr](http://risnandar.wordpress.com/2013/09/08/how-to-install-and-setup-apache-lucene-solr-in-osx/) + [Solr csv writer](https://lucene.apache.org/solr/guide/7_0/response-writers.html#ResponseWriters-CSVResponseWriter) ## Package API and ways of using the package The first thing to look at is `SolrClient` to instantiate a client connection to your Solr instance. `ping` and `schema` are helpful functions to look at after instantiating your client. There are two ways to use `solrium`: 1. Call functions on the `SolrClient` object 2. Pass the `SolrClient` object to functions For example, if we instantiate a client like `conn <- SolrClient$new()`, then to use the first way we can do `conn$search(...)`, and the second way by doing `solr_search(conn, ...)`. These two ways of using the package hopefully make the package more user friendly for more people, those that prefer a more object oriented approach, and those that prefer more of a functional approach. **Collections** Functions that start with `collection` work with Solr collections when in cloud mode. Note that these functions won't work when in Solr standard mode **Cores** Functions that start with `core` work with Solr cores when in standard Solr mode. Note that these functions won't work when in Solr cloud mode **Documents** The following functions work with documents in Solr ``` #> - add #> - delete_by_id #> - delete_by_query #> - update_atomic_json #> - update_atomic_xml #> - update_csv #> - update_json #> - update_xml ``` **Search** Search functions, including `solr_parse` for parsing results from different functions appropriately ``` #> - solr_all #> - solr_facet #> - solr_get #> - solr_group #> - solr_highlight #> - solr_mlt #> - solr_parse #> - solr_search #> - solr_stats ``` ## Install Stable version from CRAN ```r install.packages("solrium") ``` Or development version from GitHub ```r devtools::install_github("ropensci/solrium") ``` ```r library("solrium") ``` ## Setup Use `SolrClient$new()` to initialize your connection. These examples use a remote Solr server, but work on any local Solr server. ```r (cli <- SolrClient$new(host = "api.plos.org", path = "search", port = NULL)) #> #> host: api.plos.org #> path: search #> port: #> scheme: http #> errors: simple #> proxy: ``` You can also set whether you want simple or detailed error messages (via `errors`), and whether you want URLs used in each function call or not (via `verbose`), and your proxy settings (via `proxy`) if needed. For example: ```r SolrClient$new(errors = "complete") ``` Your settings are printed in the print method for the connection object ```r cli #> #> host: api.plos.org #> path: search #> port: #> scheme: http #> errors: simple #> proxy: ``` For local Solr server setup: ``` bin/solr start -e cloud -noprompt bin/post -c gettingstarted example/exampledocs/*.xml ``` ## Search ```r cli$search(params = list(q='*:*', rows=2, fl='id')) #> # A tibble: 2 x 1 #> id #> #> 1 10.1371/journal.pone.0079536/title #> 2 10.1371/journal.pone.0079536/abstract ``` ### Search grouped data Most recent publication by journal ```r cli$group(params = list(q='*:*', group.field='journal', rows=5, group.limit=1, group.sort='publication_date desc', fl='publication_date, score')) #> groupValue numFound start publication_date #> 1 plos one 1572163 0 2017-11-01T00:00:00Z #> 2 plos neglected tropical diseases 47510 0 2017-11-01T00:00:00Z #> 3 plos genetics 59871 0 2017-11-01T00:00:00Z #> 4 plos pathogens 53246 0 2017-11-01T00:00:00Z #> 5 none 63561 0 2012-10-23T00:00:00Z #> score #> 1 1 #> 2 1 #> 3 1 #> 4 1 #> 5 1 ``` First publication by journal ```r cli$group(params = list(q = '*:*', group.field = 'journal', group.limit = 1, group.sort = 'publication_date asc', fl = c('publication_date', 'score'), fq = "publication_date:[1900-01-01T00:00:00Z TO *]")) #> groupValue numFound start publication_date #> 1 plos one 1572163 0 2006-12-20T00:00:00Z #> 2 plos neglected tropical diseases 47510 0 2007-08-30T00:00:00Z #> 3 plos pathogens 53246 0 2005-07-22T00:00:00Z #> 4 plos computational biology 45582 0 2005-06-24T00:00:00Z #> 5 none 57532 0 2005-08-23T00:00:00Z #> 6 plos clinical trials 521 0 2006-04-21T00:00:00Z #> 7 plos genetics 59871 0 2005-06-17T00:00:00Z #> 8 plos medicine 23519 0 2004-09-07T00:00:00Z #> 9 plos medicin 9 0 2012-04-17T00:00:00Z #> 10 plos biology 32513 0 2003-08-18T00:00:00Z #> score #> 1 1 #> 2 1 #> 3 1 #> 4 1 #> 5 1 #> 6 1 #> 7 1 #> 8 1 #> 9 1 #> 10 1 ``` Search group query : Last 3 publications of 2013. ```r gq <- 'publication_date:[2013-01-01T00:00:00Z TO 2013-12-31T00:00:00Z]' cli$group( params = list(q='*:*', group.query = gq, group.limit = 3, group.sort = 'publication_date desc', fl = 'publication_date')) #> numFound start publication_date #> 1 307076 0 2013-12-31T00:00:00Z #> 2 307076 0 2013-12-31T00:00:00Z #> 3 307076 0 2013-12-31T00:00:00Z ``` Search group with format simple ```r cli$group(params = list(q='*:*', group.field='journal', rows=5, group.limit=3, group.sort='publication_date desc', group.format='simple', fl='journal, publication_date')) #> numFound start publication_date journal #> 1 1898495 0 2012-10-23T00:00:00Z #> 2 1898495 0 2012-10-23T00:00:00Z #> 3 1898495 0 2012-10-23T00:00:00Z #> 4 1898495 0 2017-11-01T00:00:00Z PLOS ONE #> 5 1898495 0 2017-11-01T00:00:00Z PLOS ONE ``` ### Facet ```r cli$facet(params = list(q='*:*', facet.field='journal', facet.query=c('cell', 'bird'))) #> $facet_queries #> # A tibble: 2 x 2 #> term value #> #> 1 cell 157652 #> 2 bird 16385 #> #> $facet_fields #> $facet_fields$journal #> # A tibble: 9 x 2 #> term value #> #> 1 plos one 1572163 #> 2 plos genetics 59871 #> 3 plos pathogens 53246 #> 4 plos neglected tropical diseases 47510 #> 5 plos computational biology 45582 #> 6 plos biology 32513 #> 7 plos medicine 23519 #> 8 plos clinical trials 521 #> 9 plos medicin 9 #> #> #> $facet_pivot #> NULL #> #> $facet_dates #> NULL #> #> $facet_ranges #> NULL ``` ### Highlight ```r cli$highlight(params = list(q='alcohol', hl.fl = 'abstract', rows=2)) #> # A tibble: 2 x 2 #> names #> #> 1 10.1371/journal.pone.0185457 #> 2 10.1371/journal.pone.0071284 #> # ... with 1 more variables: abstract ``` ### Stats ```r out <- cli$stats(params = list(q='ecology', stats.field=c('counter_total_all','alm_twitterCount'), stats.facet='journal')) ``` ```r out$data #> min max count missing sum sumOfSquares #> counter_total_all 0 920716 40497 0 219020039 7.604567e+12 #> alm_twitterCount 0 3401 40497 0 281128 7.300081e+07 #> mean stddev #> counter_total_all 5408.302813 12591.07462 #> alm_twitterCount 6.941946 41.88646 ``` ### More like this `solr_mlt` is a function to return similar documents to the one ```r out <- cli$mlt(params = list(q='title:"ecology" AND body:"cell"', mlt.fl='title', mlt.mindf=1, mlt.mintf=1, fl='counter_total_all', rows=5)) ``` ```r out$docs #> # A tibble: 5 x 2 #> id counter_total_all #> #> 1 10.1371/journal.pbio.1001805 21824 #> 2 10.1371/journal.pbio.0020440 25424 #> 3 10.1371/journal.pbio.1002559 9746 #> 4 10.1371/journal.pone.0087217 11502 #> 5 10.1371/journal.pbio.1002191 22013 ``` ```r out$mlt #> $`10.1371/journal.pbio.1001805` #> # A tibble: 5 x 4 #> numFound start id counter_total_all #> #> 1 3822 0 10.1371/journal.pone.0098876 3590 #> 2 3822 0 10.1371/journal.pone.0082578 2893 #> 3 3822 0 10.1371/journal.pone.0102159 2028 #> 4 3822 0 10.1371/journal.pcbi.1002652 3819 #> 5 3822 0 10.1371/journal.pcbi.1003408 9920 #> #> $`10.1371/journal.pbio.0020440` #> # A tibble: 5 x 4 #> numFound start id counter_total_all #> #> 1 1115 0 10.1371/journal.pone.0162651 2828 #> 2 1115 0 10.1371/journal.pone.0003259 3225 #> 3 1115 0 10.1371/journal.pntd.0003377 4267 #> 4 1115 0 10.1371/journal.pone.0101568 4603 #> 5 1115 0 10.1371/journal.pone.0068814 9042 #> #> $`10.1371/journal.pbio.1002559` #> # A tibble: 5 x 4 #> numFound start id counter_total_all #> #> 1 5482 0 10.1371/journal.pone.0155989 2519 #> 2 5482 0 10.1371/journal.pone.0023086 8442 #> 3 5482 0 10.1371/journal.pone.0155028 1547 #> 4 5482 0 10.1371/journal.pone.0041684 22057 #> 5 5482 0 10.1371/journal.pone.0164330 969 #> #> $`10.1371/journal.pone.0087217` #> # A tibble: 5 x 4 #> numFound start id counter_total_all #> #> 1 4576 0 10.1371/journal.pone.0175497 1088 #> 2 4576 0 10.1371/journal.pone.0159131 4937 #> 3 4576 0 10.1371/journal.pcbi.0020092 24786 #> 4 4576 0 10.1371/journal.pone.0133941 1336 #> 5 4576 0 10.1371/journal.pone.0131665 1207 #> #> $`10.1371/journal.pbio.1002191` #> # A tibble: 5 x 4 #> numFound start id counter_total_all #> #> 1 12585 0 10.1371/journal.pbio.1002232 3055 #> 2 12585 0 10.1371/journal.pone.0070448 2203 #> 3 12585 0 10.1371/journal.pone.0131700 2493 #> 4 12585 0 10.1371/journal.pone.0121680 4980 #> 5 12585 0 10.1371/journal.pone.0041534 5701 ``` ### Parsing `solr_parse` is a general purpose parser function with extension methods `solr_parse.sr_search`, `solr_parse.sr_facet`, and `solr_parse.sr_high`, for parsing `solr_search`, `solr_facet`, and `solr_highlight` function output, respectively. `solr_parse` is used internally within those three functions (`solr_search`, `solr_facet`, `solr_highlight`) to do parsing. You can optionally get back raw `json` or `xml` from `solr_search`, `solr_facet`, and `solr_highlight` setting parameter `raw=TRUE`, and then parsing after the fact with `solr_parse`. All you need to know is `solr_parse` can parse For example: ```r (out <- cli$highlight(params = list(q='alcohol', hl.fl = 'abstract', rows=2), raw=TRUE)) #> [1] "{\"response\":{\"numFound\":25987,\"start\":0,\"maxScore\":4.705177,\"docs\":[{\"id\":\"10.1371/journal.pone.0185457\",\"journal\":\"PLOS ONE\",\"eissn\":\"1932-6203\",\"publication_date\":\"2017-09-28T00:00:00Z\",\"article_type\":\"Research Article\",\"author_display\":[\"Jacqueline Willmore\",\"Terry-Lynne Marko\",\"Darcie Taing\",\"Hugues Sampasa-Kanyinga\"],\"abstract\":[\"Objectives: Alcohol-related morbidity and mortality are significant public health issues. The purpose of this study was to describe the prevalence and trends over time of alcohol consumption and alcohol-related morbidity and mortality; and public attitudes of alcohol use impacts on families and the community in Ottawa, Canada. Methods: Prevalence (2013–2014) and trends (2000–2001 to 2013–2014) of alcohol use were obtained from the Canadian Community Health Survey. Data on paramedic responses (2015), emergency department (ED) visits (2013–2015), hospitalizations (2013–2015) and deaths (2007–2011) were used to quantify the acute and chronic health effects of alcohol in Ottawa. Qualitative data were obtained from the “Have Your Say†alcohol survey, an online survey of public attitudes on alcohol conducted in 2016. Results: In 2013–2014, an estimated 595,300 (83%) Ottawa adults 19 years and older drank alcohol, 42% reported binge drinking in the past year. Heavy drinking increased from 15% in 2000–2001 to 20% in 2013–2014. In 2015, the Ottawa Paramedic Service responded to 2,060 calls directly attributable to alcohol. Between 2013 and 2015, there were an average of 6,100 ED visits and 1,270 hospitalizations per year due to alcohol. Annually, alcohol use results in at least 140 deaths in Ottawa. Men have higher rates of alcohol-attributable paramedic responses, ED visits, hospitalizations and deaths than women, and young adults have higher rates of alcohol-attributable paramedic responses. Qualitative data of public attitudes indicate that alcohol misuse has greater repercussions not only on those who drink, but also on the family and community. Conclusions: Results highlight the need for healthy public policy intended to encourage a culture of drinking in moderation in Ottawa to support lower risk alcohol use, particularly among men and young adults. \"],\"title_display\":\"The burden of alcohol-related morbidity and mortality in Ottawa, Canada\",\"score\":4.705177},{\"id\":\"10.1371/journal.pone.0071284\",\"journal\":\"PLoS ONE\",\"eissn\":\"1932-6203\",\"publication_date\":\"2013-08-20T00:00:00Z\",\"article_type\":\"Research Article\",\"author_display\":[\"Petra Suchankova\",\"Pia Steensland\",\"Ida Fredriksson\",\"Jörgen A. Engel\",\"Elisabet Jerlhag\"],\"abstract\":[\"\\nAlcohol dependence is a heterogeneous disorder where several signalling systems play important roles. Recent studies implicate that the gut-brain hormone ghrelin, an orexigenic peptide, is a potential mediator of alcohol related behaviours. Ghrelin increases whereas a ghrelin receptor (GHS-R1A) antagonist decreases alcohol consumption as well as operant self-administration of alcohol in rodents that have consumed alcohol for twelve weeks. In the present study we aimed at investigating the effect of acute and repeated treatment with the GHS-R1A antagonist JMV2959 on alcohol intake in a group of rats following voluntarily alcohol consumption for two, five and eight months. After approximately ten months of voluntary alcohol consumption the expression of the GHS-R1A gene (Ghsr) as well as the degree of methylation of a CpG island found in Ghsr was examined in reward related brain areas. In a separate group of rats, we examined the effect of the JMV2959 on alcohol relapse using the alcohol deprivation paradigm. Acute JMV2959 treatment was found to decrease alcohol intake and the effect was more pronounced after five, compared to two months of alcohol exposure. In addition, repeated JMV2959 treatment decreased alcohol intake without inducing tolerance or rebound increase in alcohol intake after the treatment. The GHS-R1A antagonist prevented the alcohol deprivation effect in rats. There was a significant down-regulation of the Ghsr expression in the ventral tegmental area (VTA) in high- compared to low-alcohol consuming rats after approximately ten months of voluntary alcohol consumption. Further analysis revealed a negative correlation between Ghsr expression in the VTA and alcohol intake. No differences in methylation degree were found between high- compared to low-alcohol consuming rats. These findings support previous studies showing that the ghrelin signalling system may constitute a potential target for development of novel treatment strategies for alcohol dependence.\\n\"],\"title_display\":\"Ghrelin Receptor (GHS-R1A) Antagonism Suppresses Both Alcohol Consumption and the Alcohol Deprivation Effect in Rats following Long-Term Voluntary Alcohol Consumption\",\"score\":4.7050986}]},\"highlighting\":{\"10.1371/journal.pone.0185457\":{\"abstract\":[\"Objectives: Alcohol-related morbidity and mortality are significant public health issues\"]},\"10.1371/journal.pone.0071284\":{\"abstract\":[\"\\nAlcohol dependence is a heterogeneous disorder where several signalling systems play important\"]}}}\n" #> attr(,"class") #> [1] "sr_high" #> attr(,"wt") #> [1] "json" ``` Then parse ```r solr_parse(out, 'df') #> # A tibble: 2 x 2 #> names #> #> 1 10.1371/journal.pone.0185457 #> 2 10.1371/journal.pone.0071284 #> # ... with 1 more variables: abstract ``` ### Advanced: Function Queries Function Queries allow you to query on actual numeric fields in the SOLR database, and do addition, multiplication, etc on one or many fields to stort results. For example, here, we search on the product of counter_total_all and alm_twitterCount, using a new temporary field "_val_" ```r cli$search(params = list(q='_val_:"product(counter_total_all,alm_twitterCount)"', rows=5, fl='id,title', fq='doc_type:full')) #> # A tibble: 5 x 2 #> id #> #> 1 10.1371/journal.pmed.0020124 #> 2 10.1371/journal.pone.0141854 #> 3 10.1371/journal.pone.0073791 #> 4 10.1371/journal.pone.0153419 #> 5 10.1371/journal.pone.0115069 #> # ... with 1 more variables: title ``` Here, we search for the papers with the most citations ```r cli$search(params = list(q='_val_:"max(counter_total_all)"', rows=5, fl='id,counter_total_all', fq='doc_type:full')) #> # A tibble: 5 x 2 #> id #> #> 1 10.1371/journal.pmed.0020124 #> 2 10.1371/annotation/80bd7285-9d2d-403a-8e6f-9c375bf977ca #> 3 10.1371/journal.pcbi.1003149 #> 4 10.1371/journal.pone.0141854 #> 5 10.1371/journal.pcbi.0030102 #> # ... with 1 more variables: counter_total_all ``` Or with the most tweets ```r cli$search(params = list(q='_val_:"max(alm_twitterCount)"', rows=5, fl='id,alm_twitterCount', fq='doc_type:full')) #> # A tibble: 5 x 2 #> id alm_twitterCount #> #> 1 10.1371/journal.pone.0141854 3401 #> 2 10.1371/journal.pmed.0020124 3207 #> 3 10.1371/journal.pone.0115069 2873 #> 4 10.1371/journal.pmed.1001953 2821 #> 5 10.1371/journal.pone.0061981 2392 ``` ### Using specific data sources __USGS BISON service__ The occurrences service ```r conn <- SolrClient$new(scheme = "https", host = "bison.usgs.gov", path = "solr/occurrences/select", port = NULL) conn$search(params = list(q = '*:*', fl = c('decimalLatitude','decimalLongitude','scientificName'), rows = 2)) #> # A tibble: 2 x 3 #> decimalLongitude scientificName decimalLatitude #> #> 1 -116.5694 Zonotrichia leucophrys 34.05072 #> 2 -116.5694 Tyrannus vociferans 34.05072 ``` The species names service ```r conn <- SolrClient$new(scheme = "https", host = "bison.usgs.gov", path = "solr/scientificName/select", port = NULL) conn$search(params = list(q = '*:*')) #> # A tibble: 10 x 2 #> scientificName `_version_` #> #> 1 Dictyopteris polypodioides 1.565325e+18 #> 2 Lonicera iberica 1.565325e+18 #> 3 Epuraea ambigua 1.565325e+18 #> 4 Pseudopomala brachyptera 1.565325e+18 #> 5 Didymosphaeria populina 1.565325e+18 #> 6 Sanoarca 1.565325e+18 #> 7 Celleporina ventricosa 1.565325e+18 #> 8 Trigonurus crotchi 1.565325e+18 #> 9 Ceraticelus laticeps 1.565325e+18 #> 10 Micraster acutus 1.565325e+18 ``` __PLOS Search API__ Most of the examples above use the PLOS search API... :) ## Solr server management This isn't as complete as searching functions show above, but we're getting there. ### Cores ```r conn <- SolrClient$new() ``` Many functions, e.g.: * `core_create()` * `core_rename()` * `core_status()` * ... Create a core ```r conn$core_create(name = "foo_bar") ``` ### Collections Many functions, e.g.: * `collection_create()` * `collection_list()` * `collection_addrole()` * ... Create a collection ```r conn$collection_create(name = "hello_world") ``` ### Add documents Add documents, supports adding from files (json, xml, or csv format), and from R objects (including `data.frame` and `list` types so far) ```r df <- data.frame(id = c(67, 68), price = c(1000, 500000000)) conn$add(df, name = "books") ``` Delete documents, by id ```r conn$delete_by_id(name = "books", ids = c(3, 4)) ``` Or by query ```r conn$delete_by_query(name = "books", query = "manu:bank") ``` ## Meta * Please [report any issues or bugs](https://github.com/ropensci/solrium/issues) * License: MIT * Get citation information for `solrium` in R doing `citation(package = 'solrium')` * Please note that this project is released with a [Contributor Code of Conduct](CONDUCT.md). By participating in this project you agree to abide by its terms. [![ropensci_footer](https://ropensci.org/public_images/github_footer.png)](https://ropensci.org) solrium/MD50000644000176200001440000002531513176564220012270 0ustar liggesusersa9f6b254fbb76129a4e190c097a5a991 *DESCRIPTION c5af52351472a750055a760a8924ce71 *LICENSE 6147f3bce15f560ba361d3631e113af7 *NAMESPACE 015fccc23f84fa8879f8480784c10bda *NEWS.md 519f0da8c1e307e8c3ce6efcdecf8ffe *R/SolrClient.R 51a24bb84d580db5ecd67f434299f211 *R/add.R 74a70333074fe4456d00b6e2277e530f *R/check_args_helpers.R ed9b1328a6361812eb7f44a8dd71928e *R/classes.r 303d3f009cfbc6bec848fc26d0c215fe *R/collection_addreplica.R 6bcf0d1dc0812110277f4a06eaa0cdde *R/collection_addreplicaprop.R 2d0fe3418cfc877548477a2b2bef4720 *R/collection_addrole.R b6343a9ae1d4f89d985fd260b599e3d6 *R/collection_balanceshardunique.R e511c30d0dd791b3b15d7b19919c3e7c *R/collection_clusterprop.R 2a9603a7140d94e015bf88ba70a9b151 *R/collection_clusterstatus.R 243f203538dbc33f22ae6169671dc904 *R/collection_create.R 979f5bf6cec0c3ff17a37eded0ebd047 *R/collection_createalias.R 5c79097e5351a62d7b7703f5f7c5ae5f *R/collection_createshard.R c0661830891b17d6f896b6d08c563f40 *R/collection_delete.R c8232f50626418fc2c54d62ee8dd39a9 *R/collection_deletealias.R 096f1e33dec260db6c2ea453dc2efd45 *R/collection_deletereplica.R 09f099f4eedac0be1cce30b553892395 *R/collection_deletereplicaprop.R b178e05dadfd441b1eadb85046859521 *R/collection_deleteshard.R 4b77d9b58bc95bb9e7b18a35ce882813 *R/collection_exists.R 2677b582bca6e52a1028ab9afea12438 *R/collection_list.R 3cf13e8e338666a59ba7b45b04a8fa07 *R/collection_migrate.R f7835251d65615664739fbff93fddf23 *R/collection_overseerstatus.R 5f575502710a6b6383c825bd4d6b65b0 *R/collection_rebalanceleaders.R 3e57d061f34f82852586631791c362d0 *R/collection_reload.R c52ae0ed1994b261c8f28cb8fd48d087 *R/collection_removerole.R 1a13ae2d8f65d06d3e8440c89e3af3ec *R/collection_requeststatus.R beb1516c83908cad52e3fc8ef9c75d59 *R/collection_splitshard.R 3e14e4ae28443f1e09ffd3030307f826 *R/collections.R 4de7d8c434f27a0b386ed81d84cf91a5 *R/commit.R d3539d5bc7e120c2ca38429082bd1d23 *R/config_get.R 8ce85ae2d4374337226e1e761fb847de *R/config_overlay.R 6f8defad7fa86ee7f0b8e7e65b33c549 *R/config_params.R 66353ed97c77a7cfce5974f6a6408044 *R/config_set.R aa0c2598a5077f0e6c233282cd18db9d *R/content_types.R 03b47bf590ffa32ad0a56aba5d1726aa *R/core_create.R bd94926a81c559b1d23701363fe9eeac *R/core_exists.R 0386f1af9bbe2d29c8d0520fa79d1b8c *R/core_mergeindexes.R 10e1de7c866c7a71b2989d30674b250d *R/core_reload.R b07588a1e7d854685e1b2d7324bd8e4e *R/core_rename.R 8e56bd3bf9cc8a0948ce1d6e4ac76216 *R/core_requeststatus.R 1112fc94a77c16fc969a061f8f2c40c1 *R/core_split.R 70814361469f34b89ac64ad91b9a6f14 *R/core_status.R d1f3339df4fda9c0e48bb7a14479fcf0 *R/core_swap.R 9a92ba303c8719f45d809782f9cc5aa0 *R/core_unload.R c2dfedc7bf6c4336c9a06a62b1e09489 *R/delete.R c497e20c95c55bf83ad3ba7d46c40bbf *R/optimize.R e7f9105b78fe7090c809ea14cbdb51de *R/parsers.R 8537d997ad200c9001ccb78fd244ef87 *R/ping.R 268ae9899726f7c5bd38f93f45c3548b *R/schema.R 33f009223c3e99005c87d48342d13215 *R/search_route_keys.R 32b7ca10eddf65ac52d3126382221ccb *R/solr_all.r ca0e43329720ff0c0efa2d84446ccc28 *R/solr_facet.r c291615b5d5e1f1fde68360cdd253005 *R/solr_get.R 9a24b9176a7fd56dec21c8dfee4b689a *R/solr_group.r acde743d3d4dd4dc428a80b0d7f37bdb *R/solr_highlight.r db959de704f4028b485d8cf6396d2b9f *R/solr_mlt.r e6c39fd5a5d35731292a8b7a055ccc9c *R/solr_search.r 10b08a15c4f6625be4e113e66b2d960f *R/solr_stats.r 0dc4cc285b6f4c55daa434d0a6ea5eb9 *R/solrium-package.R 5172210f95313185eaec7372a06b0d8e *R/update_atomic_json.R 324dc3a80d76b017c1b9e5ab4e3c6b89 *R/update_atomic_xml.R c4f5c519254c3f369a5b0bd93d868250 *R/update_csv.R ae1fe1b092aec27b166e6ae79c8ab891 *R/update_json.R ecee6b8324315ee18654c01e2ef62093 *R/update_xml.R fef37f4cf43ff7e75715f5a445462cb3 *R/zzz.r f4318dab14f7d2342d9337123472f6be *README.md 73a20101fec67c6f936ae8246947946a *build/vignette.rds ae1097c4c79e8dfbf4f5982af5a2bb3f *inst/doc/cores_collections.Rmd 0f33cd79c266c0543a4a8ec6dca17c91 *inst/doc/cores_collections.html 24a71da1896d16ecbd9fc4d7476c91d3 *inst/doc/document_management.Rmd b077e3a569d0726ca65946c5513a000b *inst/doc/document_management.html 8b3228a467423b9a150c7e9a99bbfeef *inst/doc/local_setup.Rmd d43725530d4414406cf2bdabadd53cf3 *inst/doc/local_setup.html f4bc6338aebf8ed9212b6f8c5122a1d1 *inst/doc/search.Rmd 253ad63f64961638a8d40bfdca2c143b *inst/doc/search.html cd1cc006debed6e2b4556001fb61b872 *inst/examples/add_delete.json ab2c69200e3d331b34d8b8d9158feab4 *inst/examples/add_delete.xml 8dc63db5d80bc7f931c6a439080c3bbc *inst/examples/books.csv b2c72a069b9c55a21c7e4a512cb52c32 *inst/examples/books.json d19723170a92193a92840af3cfbb0c15 *inst/examples/books.xml ec0e387d7aee2c2f391d31882cc75eed *inst/examples/books2.json d56e9cd01b8e1a6c61cfcc077219cffa *inst/examples/books2_delete.json f437720237985879e5f2347694aac166 *inst/examples/books2_delete.xml c79fd4b2cbf3d6b44752c71a944335b0 *inst/examples/books_delete.json 0cbb22664329aa7d8e125bff214f1a35 *inst/examples/books_delete.xml 1c8662000097080ed54d2d9cdc4313c2 *inst/examples/schema.xml 7344fdb8f567b318829a54322bafc504 *inst/examples/solrconfig.xml f8225c6c4a245757188e297d7259f5bb *inst/examples/updatecommands_add.json 1d42c66dcbc92c2f8ac0c2a3effabcca *inst/examples/updatecommands_add.xml 5eab27b9c1f8c6f873c7bb16dd7d24a7 *inst/examples/updatecommands_delete.json d268b60d7387fb5dc7c7b640de3e1ea1 *inst/examples/updatecommands_delete.xml d9ca6e1c6a421c09252e389aef1f0535 *man/SolrClient.Rd 363eaa8d0f45441bb35efda979b2dbe9 *man/add.Rd be82eb40abcd983510ee1d2af38d1610 *man/collapse_pivot_names.Rd 78294e9b187a0441566f1ab312bbfdad *man/collection_addreplica.Rd 707717c713d372a201b0c91a373ab615 *man/collection_addreplicaprop.Rd 5ff2387d649bf3996c5a90ff5a759d5d *man/collection_addrole.Rd be28882bc27c489319bb7fec1f0d4d05 *man/collection_balanceshardunique.Rd 9edcc747d3eb8b6da0fa81d813d0e938 *man/collection_clusterprop.Rd 68b07f2f80979d08c9b57a5669965129 *man/collection_clusterstatus.Rd 5b627557db1fc02904d367a1fbd089a5 *man/collection_create.Rd a8d93f334bcdae775162652e5e394fc3 *man/collection_createalias.Rd ca482055af0977eed371bb907304f182 *man/collection_createshard.Rd 5bdea47a19db7a87fd723aa1d85b7444 *man/collection_delete.Rd 59c5613db0979f87a1362d12b1869aea *man/collection_deletealias.Rd 1e06bb6219379d1d17cd02145827b821 *man/collection_deletereplica.Rd 206c7330cb11f725904e287711e18f97 *man/collection_deletereplicaprop.Rd 52e611201b8c50f9b5fcb1e280161cf2 *man/collection_deleteshard.Rd 3aa8e3423d20c94f977de83fa2783bd4 *man/collection_exists.Rd 243a59739867bee00821facf00b8d7eb *man/collection_list.Rd c83caf0e99c1f8798f93ae1905c39b00 *man/collection_migrate.Rd 451eb3efacde1b7c480b3c4766a19ba9 *man/collection_overseerstatus.Rd 17f3e2f8407c5d6dbf7319d44f9b26a8 *man/collection_rebalanceleaders.Rd 22abf6ae18bfe3823f3d8d118920acfd *man/collection_reload.Rd 68a113d06d718e7b3ebcf3e9039cf7fb *man/collection_removerole.Rd 9698967ae7240248c6907411b1a9dbeb *man/collection_requeststatus.Rd eeb299bae2d8218722f6af36ed000475 *man/collection_splitshard.Rd d355868cc83860adb6a13b2e65f95a41 *man/collections.Rd 1316c5571e56c3d0db766a7d3b659e4c *man/commit.Rd ec9a08982d63890ec0c5af832ce92c67 *man/config_get.Rd 58f715f83073846d4a3a915ca045e818 *man/config_overlay.Rd 20cf3e585698725426680eaaa0436dc4 *man/config_params.Rd caf8a6f5497b8eaf7f2b0fd64d9d974e *man/config_set.Rd d9ca78fb61bb8f516d03a59bda170b79 *man/core_create.Rd 18114e49836db916e449e5998f2624d6 *man/core_exists.Rd 4418747a026107b89471bca47b3ec16e *man/core_mergeindexes.Rd 270af43560eb3a503971e1107fb6a3b4 *man/core_reload.Rd cbd213de89e022e12499415eed338e3e *man/core_rename.Rd 3fbc439715559065ad0cb0bfca97a907 *man/core_requeststatus.Rd 6515de785c27318696cb043f6b9069e9 *man/core_split.Rd aad7bd951f4314b143a6301c0fd9dc6f *man/core_status.Rd d40278a4fb3217e8bfd5188e9aed54e1 *man/core_swap.Rd 7a7e98ae6bb64723b14eb2a281e18484 *man/core_unload.Rd 4915619f623906447223fa0c7609ee53 *man/delete.Rd c500e495fc2936de7888060c883b1fe5 *man/is-sr.Rd 773e7575d5753efe8175c2f82ea32b13 *man/makemultiargs.Rd 2360da5240906b0ffdebc5d9f051b14b *man/ping.Rd 7add5ac5e331e622f8f75e91a69cb416 *man/pivot_flatten_tabular.Rd dc1bf83683ddc1a5f70c21071e307fd9 *man/schema.Rd 89e3bc155893ab4ba62673eff88b04b1 *man/solr_all.Rd f275e18b0bb6f47fd263de673cdb194e *man/solr_facet.Rd 924b0fa8bd1fdb54238929a2698aceae *man/solr_get.Rd fc8069708d471d59f00450f50070d5cc *man/solr_group.Rd a2b7b2612581e4fb86d4282be276935d *man/solr_highlight.Rd ba53d988a5466a7b6f73578347f2901a *man/solr_mlt.Rd 4945c47c1995f5ae0fb5c5991871f2f1 *man/solr_optimize.Rd 078a05655fa73abafca65fb0a53a0b3b *man/solr_parse.Rd c68066d685e4faf639ea85d5ca469607 *man/solr_search.Rd 0ab2d4dbf743b24a4ed77011dc3d1c13 *man/solr_stats.Rd 9ab36ee2cc21b7583ccafac30b170337 *man/solrium-package.Rd d734b617f91fd6750b7a7ac70163054e *man/update_atomic_json.Rd c4234a41a385a4a60bc34263ea6ea1f7 *man/update_atomic_xml.Rd 00e78ae90e489d35652ed16ebc381894 *man/update_csv.Rd b4e30bf1582f82c9657a05ad5dd12338 *man/update_json.Rd 398845a8c0ad2a69737b262b1e7c3481 *man/update_xml.Rd b4487f117183b6157cba9b75de6d078a *tests/cloud_mode/test-add.R a72186f5ba6d9b13fe5219c2e8024c2e *tests/cloud_mode/test-collections.R 1baaceeffe758af5c1b0b01e073927e2 *tests/standard_mode/test-core_create.R d4549d7babf9d1437a58916e7778aafb *tests/test-all.R 7db67b4e4446b2e7eaf8c91cd6c6c8d3 *tests/testthat/helper-solrium.R 76ad8e374fa9d7ca38c84193602293ba *tests/testthat/test-add.R 8b65e2f8269bddd921c5079cdef0b96a *tests/testthat/test-client.R 4f634305850503776164a898d7f58719 *tests/testthat/test-collections.R 9efa8981e4737d2fdd02dba6320ce4a8 *tests/testthat/test-core_create.R b0c377a9f0842f20a1054a1b9f103a62 *tests/testthat/test-delete.R 5d50d567cc62e141475db642fc9b37ec *tests/testthat/test-errors.R 04976b640a1ca2b204b34f499be9693a *tests/testthat/test-ping.R c042f55fe69bf8cd9cb6e956df89c140 *tests/testthat/test-schema.R 1054ffe8bd0314e3f227b5eb6bc162c2 *tests/testthat/test-solr_all.R b24c6a523123d571d3a24f9d3c198934 *tests/testthat/test-solr_error.R 4ce0d10effac270c39ea51a432fd72bf *tests/testthat/test-solr_facet.r 48997aafacc6dddcf4427c0e87c875fe *tests/testthat/test-solr_get.R 704b1d7e0ad5daf0a484749e55b336d9 *tests/testthat/test-solr_goup.R 97781e5df6dfdc278b7a2e1a43649d07 *tests/testthat/test-solr_highlight.r 4c8b786a1690b4620549fc18cb4eed10 *tests/testthat/test-solr_mlt.r 341f4deb3ead0ca9f504c6372050ac24 *tests/testthat/test-solr_search.r 2a53b123f557b35b7f3f5ef89140285b *tests/testthat/test-solr_stats.r ccd3487af7a54204ddf8275587cd8f8d *tests/testthat/test-update_atomic_json.R a273ac80232ad13018015d48922c27a0 *tests/testthat/test-update_atomic_xml.R cd5267e708a49578de64d0c589ed14eb *tests/testthat/test-update_csv.R e761a4148542b740dad408411d5e66ae *tests/testthat/test-update_json.R 89f45dd274d1929b4b830ec84b6c320d *tests/testthat/test-update_xml.R ae1097c4c79e8dfbf4f5982af5a2bb3f *vignettes/cores_collections.Rmd 24a71da1896d16ecbd9fc4d7476c91d3 *vignettes/document_management.Rmd 8b3228a467423b9a150c7e9a99bbfeef *vignettes/local_setup.Rmd f4bc6338aebf8ed9212b6f8c5122a1d1 *vignettes/search.Rmd solrium/build/0000755000176200001440000000000013176475640013060 5ustar liggesuserssolrium/build/vignette.rds0000644000176200001440000000044413176475640015421 0ustar liggesusers‹’ÏNÃ0 Ƴ¶ Ö QiâÏCŸ <Õ.p@ƒ·*J «”&S’iâÆƒ#Š[\H7†¦*±ý}n~±Úç˜1°0 XbÎpãJpEøL0ž mÀBK ÂUZÙlQ—d^–Z¬kP®¨¹â¯Ð¦ž}&µà²°àÖ+OŽ-p#–2„]ß¶°–þL=³œ˜»VrßòÒG-MÚAIŸ’ÒR·ˆ»ã-]-ɽúk>ÏOü=}JvÒÈöF­«¾ûÙ?ñÀ>ü®Œèü#Åk°dŽIŒæ•„¾á©r?EøÏ)-(9Îaª´TžÜÁÛF¬‡ ‰Ñ›¬‡¶Ø;nMÓ|lßHHnûõb\rdzƒïcõùÕ¶4Ì£solrium/DESCRIPTION0000644000176200001440000000211313176564220013455 0ustar liggesusersPackage: solrium Title: General Purpose R Interface to 'Solr' Description: Provides a set of functions for querying and parsing data from 'Solr' () 'endpoints' (local and remote), including search, 'faceting', 'highlighting', 'stats', and 'more like this'. In addition, some functionality is included for creating, deleting, and updating documents in a 'Solr' 'database'. Version: 1.0.0 Authors@R: person("Scott", "Chamberlain", role = c("aut", "cre"), email = "myrmecocystus@gmail.com") License: MIT + file LICENSE URL: https://github.com/ropensci/solrium BugReports: https://github.com/ropensci/solrium/issues VignetteBuilder: knitr Imports: utils, dplyr (>= 0.5.0), plyr (>= 1.8.4), crul (>= 0.4.0), xml2 (>= 1.0.0), jsonlite (>= 1.0), tibble (>= 1.2), R6 Suggests: roxygen2 (>= 6.0.1), testthat, knitr RoxygenNote: 6.0.1 NeedsCompilation: no Packaged: 2017-11-02 01:57:53 UTC; sacmac Author: Scott Chamberlain [aut, cre] Maintainer: Scott Chamberlain Repository: CRAN Date/Publication: 2017-11-02 09:42:40 UTC solrium/man/0000755000176200001440000000000013167521443012525 5ustar liggesuserssolrium/man/collection_deletealias.Rd0000644000176200001440000000175613176213437017515 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/collection_deletealias.R \name{collection_deletealias} \alias{collection_deletealias} \title{Delete a collection alias} \usage{ collection_deletealias(conn, alias, raw = FALSE, callopts = list()) } \arguments{ \item{conn}{A solrium connection object, see \link{SolrClient}} \item{alias}{(character) Required. The alias name to be created} \item{raw}{(logical) If \code{TRUE}, returns raw data} \item{callopts}{curl options passed on to \link[crul:HttpClient]{crul::HttpClient}} } \description{ Delete a collection alias } \examples{ \dontrun{ (conn <- SolrClient$new()) if (!conn$collection_exists("thingsstuff")) { conn$collection_create(name = "thingsstuff") } conn$collection_createalias("tstuff", "thingsstuff") conn$collection_clusterstatus()$cluster$collections$thingsstuff$aliases # new alias conn$collection_deletealias("tstuff") conn$collection_clusterstatus()$cluster$collections$thingsstuff$aliases # gone } } solrium/man/commit.Rd0000644000176200001440000000271213176256017014310 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/commit.R \name{commit} \alias{commit} \title{Commit} \usage{ commit(conn, name, expunge_deletes = FALSE, wait_searcher = TRUE, soft_commit = FALSE, wt = "json", raw = FALSE, ...) } \arguments{ \item{conn}{A solrium connection object, see \link{SolrClient}} \item{name}{(character) A collection or core name. Required.} \item{expunge_deletes}{merge segments with deletes away. Default: \code{FALSE}} \item{wait_searcher}{block until a new searcher is opened and registered as the main query searcher, making the changes visible. Default: \code{TRUE}} \item{soft_commit}{perform a soft commit - this will refresh the 'view' of the index in a more performant manner, but without "on-disk" guarantees. Default: \code{FALSE}} \item{wt}{(character) One of json (default) or xml. If json, uses \code{\link[jsonlite:fromJSON]{jsonlite::fromJSON()}} to parse. If xml, uses \code{\link[xml2:read_xml]{xml2::read_xml()}} to parse} \item{raw}{(logical) If \code{TRUE}, returns raw data in format specified by \code{wt} param} \item{...}{curl options passed on to \link[crul:HttpClient]{crul::HttpClient}} } \description{ Commit } \examples{ \dontrun{ (conn <- SolrClient$new()) conn$commit("gettingstarted") conn$commit("gettingstarted", wait_searcher = FALSE) # get xml back conn$commit("gettingstarted", wt = "xml") ## raw xml conn$commit("gettingstarted", wt = "xml", raw = TRUE) } } \references{ <> } solrium/man/core_create.Rd0000644000176200001440000000475113167507346015304 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/core_create.R \name{core_create} \alias{core_create} \title{Create a core} \usage{ core_create(conn, name, instanceDir = NULL, config = NULL, schema = NULL, dataDir = NULL, configSet = NULL, collection = NULL, shard = NULL, async = NULL, raw = FALSE, callopts = list(), ...) } \arguments{ \item{conn}{A solrium connection object, see \link{SolrClient}} \item{name}{(character) The name of the core to be created. Required} \item{instanceDir}{(character) Path to instance directory} \item{config}{(character) Path to config file} \item{schema}{(character) Path to schema file} \item{dataDir}{(character) Name of the data directory relative to instanceDir.} \item{configSet}{(character) Name of the configset to use for this core. For more information, see https://lucene.apache.org/solr/guide/6_6/config-sets.html} \item{collection}{(character) The name of the collection to which this core belongs. The default is the name of the core. collection.= causes a property of = to be set if a new collection is being created. Use collection.configNa me= to point to the configuration for a new collection.} \item{shard}{(character) The shard id this core represents. Normally you want to be auto-assigned a shard id.} \item{async}{(character) Request ID to track this action which will be processed asynchronously} \item{raw}{(logical) If \code{TRUE}, returns raw data} \item{callopts}{curl options passed on to \link[crul:HttpClient]{crul::HttpClient}} \item{...}{You can pass in parameters like \code{property.name=value} to set core property name to value. See the section Defining core.properties for details on supported properties and values. (https://lucene.apache.org/solr/guide/6_6/defining-core-properties.html)} } \description{ Create a core } \examples{ \dontrun{ # start Solr with Schemaless mode via the schemaless eg: # bin/solr start -e schemaless # you can create a new core like: bin/solr create -c corename # where is the name for your core - or create as below # connect (conn <- SolrClient$new()) # Create a core path <- "~/solr-7.0.0/server/solr/newcore/conf" dir.create(path, recursive = TRUE) files <- list.files("~/solr-7.0.0/server/solr/configsets/sample_techproducts_configs/conf/", full.names = TRUE) invisible(file.copy(files, path, recursive = TRUE)) conn$core_create(name = "newcore", instanceDir = "newcore", configSet = "sample_techproducts_configs") } } solrium/man/SolrClient.Rd0000644000176200001440000002143513176437707015110 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/SolrClient.R \docType{data} \name{SolrClient} \alias{SolrClient} \title{Solr connection client} \arguments{ \item{host}{(character) Host url. Deafault: 127.0.0.1} \item{path}{(character) url path.} \item{port}{(character/numeric) Port. Default: 8389} \item{scheme}{(character) http scheme, one of http or https. Default: http} \item{proxy}{List of arguments for a proxy connection, including one or more of: url, port, username, password, and auth. See \link[crul:proxy]{crul::proxy} for help, which is used to construct the proxy connection.} \item{errors}{(character) One of \code{"simple"} or \code{"complete"}. Simple gives http code and error message on an error, while complete gives both http code and error message, and stack trace, if available.} } \value{ Various output, see help files for each grouping of methods. } \description{ Solr connection client } \details{ \code{SolrClient} creates a R6 class object. The object is not cloneable and is portable, so it can be inherited across packages without complication. \code{SolrClient} is used to initialize a client that knows about your Solr instance, with options for setting host, port, http scheme, and simple vs. complete error reporting } \section{SolrClient methods}{ Each of these methods also has a matching standalone exported function that you can use by passing in the connection object made by calling \code{SolrClient$new()}. Also, see the docs for each method for parameter definitions and their default values. \itemize{ \item \code{ping(name, wt = 'json', raw = FALSE, ...)} \item \code{schema(name, what = '', raw = FALSE, ...)} \item \code{commit(name, expunge_deletes = FALSE, wait_searcher = TRUE, soft_commit = FALSE, wt = 'json', raw = FALSE, ...)} \item \code{optimize(name, max_segments = 1, wait_searcher = TRUE, soft_commit = FALSE, wt = 'json', raw = FALSE, ...)} \item \code{config_get(name, what = NULL, wt = "json", raw = FALSE, ...)} \item \code{config_params(name, param = NULL, set = NULL, unset = NULL, update = NULL, ...)} \item \code{config_overlay(name, omitHeader = FALSE, ...)} \item \code{config_set(name, set = NULL, unset = NULL, ...)} \item \code{collection_exists(name, ...)} \item \code{collection_list(raw = FALSE, ...)} \item \code{collection_create(name, numShards = 1, maxShardsPerNode = 1, createNodeSet = NULL, collection.configName = NULL, replicationFactor = 1, router.name = NULL, shards = NULL, createNodeSet.shuffle = TRUE, router.field = NULL, autoAddReplicas = FALSE, async = NULL, raw = FALSE, callopts=list(), ...)} \item \code{collection_addreplica(name, shard = NULL, route = NULL, node = NULL, instanceDir = NULL, dataDir = NULL, async = NULL, raw = FALSE, callopts=list(), ...)} \item \code{collection_addreplicaprop(name, shard, replica, property, property.value, shardUnique = FALSE, raw = FALSE, callopts=list())} \item \code{collection_addrole(role = "overseer", node, raw = FALSE, ...)} \item \code{collection_balanceshardunique(name, property, onlyactivenodes = TRUE, shardUnique = NULL, raw = FALSE, ...)} \item \code{collection_clusterprop(name, val, raw = FALSE, callopts=list())} \item \code{collection_clusterstatus(name = NULL, shard = NULL, raw = FALSE, ...)} \item \code{collection_createalias(alias, collections, raw = FALSE, ...)} \item \code{collection_createshard(name, shard, createNodeSet = NULL, raw = FALSE, ...)} \item \code{collection_delete(name, raw = FALSE, ...)} \item \code{collection_deletealias(alias, raw = FALSE, ...)} \item \code{collection_deletereplica(name, shard = NULL, replica = NULL, onlyIfDown = FALSE, raw = FALSE, callopts=list(), ...)} \item \code{collection_deletereplicaprop(name, shard, replica, property, raw = FALSE, callopts=list())} \item \code{collection_deleteshard(name, shard, raw = FALSE, ...)} \item \code{collection_migrate(name, target.collection, split.key, forward.timeout = NULL, async = NULL, raw = FALSE, ...)} \item \code{collection_overseerstatus(raw = FALSE, ...)} \item \code{collection_rebalanceleaders(name, maxAtOnce = NULL, maxWaitSeconds = NULL, raw = FALSE, ...)} \item \code{collection_reload(name, raw = FALSE, ...)} \item \code{collection_removerole(role = "overseer", node, raw = FALSE, ...)} \item \code{collection_requeststatus(requestid, raw = FALSE, ...)} \item \code{collection_splitshard(name, shard, ranges = NULL, split.key = NULL, async = NULL, raw = FALSE, ...)} \item \code{core_status(name = NULL, indexInfo = TRUE, raw = FALSE, callopts=list())} \item \code{core_exists(name, callopts = list())} \item \code{core_create(name, instanceDir = NULL, config = NULL, schema = NULL, dataDir = NULL, configSet = NULL, collection = NULL, shard = NULL, async=NULL, raw = FALSE, callopts=list(), ...)} \item \code{core_unload(name, deleteIndex = FALSE, deleteDataDir = FALSE, deleteInstanceDir = FALSE, async = NULL, raw = FALSE, callopts = list())} \item \code{core_rename(name, other, async = NULL, raw = FALSE, callopts=list())} \item \code{core_reload(name, raw = FALSE, callopts=list())} \item \code{core_swap(name, other, async = NULL, raw = FALSE, callopts=list())} \item \code{core_mergeindexes(name, indexDir = NULL, srcCore = NULL, async = NULL, raw = FALSE, callopts = list())} \item \code{core_requeststatus(requestid, raw = FALSE, callopts = list())} \item \code{core_split(name, path = NULL, targetCore = NULL, ranges = NULL, split.key = NULL, async = NULL, raw = FALSE, callopts=list())} \item \code{search(name = NULL, params = NULL, body = NULL, callopts = list(), raw = FALSE, parsetype = 'df', concat = ',', optimizeMaxRows = TRUE, minOptimizedRows = 50000L, ...)} \item \code{facet(name = NULL, params = NULL, body = NULL, callopts = list(), raw = FALSE, parsetype = 'df', concat = ',', ...)} \item \code{stats(name = NULL, params = list(q = '*:*', stats.field = NULL, stats.facet = NULL), body = NULL, callopts=list(), raw = FALSE, parsetype = 'df', ...)} \item \code{highlight(name = NULL, params = NULL, body = NULL, callopts=list(), raw = FALSE, parsetype = 'df', ...)} \item \code{group(name = NULL, params = NULL, body = NULL, callopts=list(), raw=FALSE, parsetype='df', concat=',', ...)} \item \code{mlt(name = NULL, params = NULL, body = NULL, callopts=list(), raw=FALSE, parsetype='df', concat=',', optimizeMaxRows = TRUE, minOptimizedRows = 50000L, ...)} \item \code{all(name = NULL, params = NULL, body = NULL, callopts=list(), raw=FALSE, parsetype='df', concat=',', optimizeMaxRows = TRUE, minOptimizedRows = 50000L, ...)} \item \code{get(ids, name, fl = NULL, wt = 'json', raw = FALSE, ...)} \item \code{add(x, name, commit = TRUE, commit_within = NULL, overwrite = TRUE, boost = NULL, wt = 'json', raw = FALSE, ...)} \item \code{delete_by_id(ids, name, commit = TRUE, commit_within = NULL, overwrite = TRUE, boost = NULL, wt = 'json', raw = FALSE, ...)} \item \code{delete_by_query(query, name, commit = TRUE, commit_within = NULL, overwrite = TRUE, boost = NULL, wt = 'json', raw = FALSE, ...)} \item \code{update_json(files, name, commit = TRUE, optimize = FALSE, max_segments = 1, expunge_deletes = FALSE, wait_searcher = TRUE, soft_commit = FALSE, prepare_commit = NULL, wt = 'json', raw = FALSE, ...)} \item \code{update_xml(files, name, commit = TRUE, optimize = FALSE, max_segments = 1, expunge_deletes = FALSE, wait_searcher = TRUE, soft_commit = FALSE, prepare_commit = NULL, wt = 'json', raw = FALSE, ...)} \item \code{update_csv(files, name, separator = ',', header = TRUE, fieldnames = NULL, skip = NULL, skipLines = 0, trim = FALSE, encapsulator = NULL, escape = NULL, keepEmpty = FALSE, literal = NULL, map = NULL, split = NULL, rowid = NULL, rowidOffset = NULL, overwrite = NULL, commit = NULL, wt = 'json', raw = FALSE, ...)} \item \code{update_atomic_json(body, name, wt = 'json', raw = FALSE, ...)} \item \code{update_atomic_xml(body, name, wt = 'json', raw = FALSE, ...)} } } \examples{ \dontrun{ # make a client (cli <- SolrClient$new()) # variables cli$host cli$port cli$path cli$scheme # ping ## ping to make sure it's up cli$ping("gettingstarted") # version ## get Solr version information cli$schema("gettingstarted") cli$schema("gettingstarted", "fields") cli$schema("gettingstarted", "name") cli$schema("gettingstarted", "version")$version # Search cli$search("gettingstarted", params = list(q = "*:*")) cli$search("gettingstarted", body = list(query = "*:*")) # set a different host SolrClient$new(host = 'stuff.com') # set a different port SolrClient$new(host = 3456) # set a different http scheme SolrClient$new(scheme = 'https') # set a proxy SolrClient$new(proxy = list(url = "187.62.207.130:3128")) prox <- list(url = "187.62.207.130:3128", user = "foo", pwd = "bar") cli <- SolrClient$new(proxy = prox) cli$proxy # A remote Solr instance to which you don't have admin access (cli <- SolrClient$new(host = "api.plos.org", path = "search", port = NULL)) cli$search(params = list(q = "memory")) } } \keyword{datasets} solrium/man/collection_createshard.Rd0000644000176200001440000000254213167507346017525 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/collection_createshard.R \name{collection_createshard} \alias{collection_createshard} \title{Create a shard} \usage{ collection_createshard(conn, name, shard, createNodeSet = NULL, raw = FALSE, ...) } \arguments{ \item{conn}{A solrium connection object, see \link{SolrClient}} \item{name}{(character) The name of the core to be created. Required} \item{shard}{(character) Required. The name of the shard to be created.} \item{createNodeSet}{(character) Allows defining the nodes to spread the new collection across. If not provided, the CREATE operation will create shard-replica spread across all live Solr nodes. The format is a comma-separated list of node_names, such as localhost:8983_solr, localhost:8984_s olr, localhost:8985_solr.} \item{raw}{(logical) If \code{TRUE}, returns raw data} \item{...}{You can pass in parameters like \code{property.name=value} to set core property name to value. See the section Defining core.properties for details on supported properties and values. (https://lucene.apache.org/solr/guide/7_0/defining-core-properties.html)} } \description{ Create a shard } \examples{ \dontrun{ (conn <- SolrClient$new()) ## FIXME - doesn't work right now # conn$collection_create(name = "trees") # conn$collection_createshard(name = "trees", shard = "newshard") } } solrium/man/solr_get.Rd0000644000176200001440000000315413167521442014634 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/solr_get.R \name{solr_get} \alias{solr_get} \title{Real time get} \usage{ solr_get(conn, ids, name, fl = NULL, wt = "json", raw = FALSE, ...) } \arguments{ \item{conn}{A solrium connection object, see \link{SolrClient}} \item{ids}{Document IDs, one or more in a vector or list} \item{name}{(character) A collection or core name. Required.} \item{fl}{Fields to return, can be a character vector like \code{c('id', 'title')}, or a single character vector with one or more comma separated names, like \code{'id,title'}} \item{wt}{(character) One of json (default) or xml. Data type returned. If json, uses \code{\link[jsonlite:fromJSON]{jsonlite::fromJSON()}} to parse. If xml, uses \code{\link[xml2:read_xml]{xml2::read_xml()}} to parse.} \item{raw}{(logical) If \code{TRUE}, returns raw data in format specified by \code{wt} param} \item{...}{curl options passed on to \link[crul:HttpClient]{crul::HttpClient}} } \description{ Get documents by id } \details{ We use json internally as data interchange format for this function. } \examples{ \dontrun{ (cli <- SolrClient$new()) # add some documents first ss <- list(list(id = 1, price = 100), list(id = 2, price = 500)) add(cli, ss, name = "gettingstarted") # Now, get documents by id solr_get(cli, ids = 1, "gettingstarted") solr_get(cli, ids = 2, "gettingstarted") solr_get(cli, ids = c(1, 2), "gettingstarted") solr_get(cli, ids = "1,2", "gettingstarted") # Get raw JSON solr_get(cli, ids = 1, "gettingstarted", raw = TRUE, wt = "json") solr_get(cli, ids = 1, "gettingstarted", raw = TRUE, wt = "xml") } } solrium/man/collection_deletereplicaprop.Rd0000644000176200001440000000375213176214143020735 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/collection_deletereplicaprop.R \name{collection_deletereplicaprop} \alias{collection_deletereplicaprop} \title{Delete a replica property} \usage{ collection_deletereplicaprop(conn, name, shard, replica, property, raw = FALSE, callopts = list()) } \arguments{ \item{conn}{A solrium connection object, see \link{SolrClient}} \item{name}{(character) The name of the core to be created. Required} \item{shard}{(character) Required. The name of the shard the replica belongs to.} \item{replica}{(character) Required. The replica, e.g. core_node1.} \item{property}{(character) Required. The property to delete. Note: this will have the literal 'property.' prepended to distinguish it from system-maintained properties. So these two forms are equivalent: \code{property=special} and \code{property=property.special}} \item{raw}{(logical) If \code{TRUE}, returns raw data} \item{callopts}{curl options passed on to \link[crul:HttpClient]{crul::HttpClient}} } \description{ Deletes an arbitrary property from a particular replica. } \examples{ \dontrun{ (conn <- SolrClient$new()) # create collection if (!conn$collection_exists("deleterep")) { conn$collection_create(name = "deleterep") # OR bin/solr create -c deleterep } # status conn$collection_clusterstatus()$cluster$collections$deleterep$shards # add the value bar to the property foo conn$collection_addreplicaprop(name = "deleterep", shard = "shard1", replica = "core_node1", property = "foo", property.value = "bar") # check status conn$collection_clusterstatus()$cluster$collections$deleterep$shards conn$collection_clusterstatus()$cluster$collections$deleterep$shards$shard1$replicas$core_node1 # delete replica property conn$collection_deletereplicaprop(name = "deleterep", shard = "shard1", replica = "core_node1", property = "foo") # check status - foo should be gone conn$collection_clusterstatus()$cluster$collections$deleterep$shards$shard1$replicas$core_node1 } } solrium/man/core_reload.Rd0000644000176200001440000000163013167507346015300 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/core_reload.R \name{core_reload} \alias{core_reload} \title{Reload a core} \usage{ core_reload(conn, name, raw = FALSE, callopts = list()) } \arguments{ \item{conn}{A solrium connection object, see \link{SolrClient}} \item{name}{(character) The name of the core to be created. Required} \item{raw}{(logical) If \code{TRUE}, returns raw data} \item{callopts}{curl options passed on to \link[crul:HttpClient]{crul::HttpClient}} } \description{ Reload a core } \examples{ \dontrun{ # start Solr with Schemaless mode via the schemaless eg: # bin/solr start -e schemaless # you can create a new core like: bin/solr create -c corename # where is the name for your core - or creaate as below # connect (conn <- SolrClient$new()) # Status of particular cores conn$core_reload("gettingstarted") conn$core_status("gettingstarted") } } solrium/man/config_overlay.Rd0000644000176200001440000000202013167507346016022 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/config_overlay.R \name{config_overlay} \alias{config_overlay} \title{Get Solr configuration overlay} \usage{ config_overlay(conn, name, omitHeader = FALSE, ...) } \arguments{ \item{conn}{A solrium connection object, see \link{SolrClient}} \item{name}{(character) The name of the core. If not given, all cores.} \item{omitHeader}{(logical) If \code{TRUE}, omit header. Default: \code{FALSE}} \item{...}{curl options passed on to \link[crul:HttpClient]{crul::HttpClient}} } \value{ A list with response from server } \description{ Get Solr configuration overlay } \examples{ \dontrun{ # start Solr with Cloud mode via the schemaless eg: bin/solr -e cloud # you can create a new core like: bin/solr create -c corename # where is the name for your core - or creaate as below # connect (conn <- SolrClient$new()) # get config overlay conn$config_overlay("gettingstarted") # without header conn$config_overlay("gettingstarted", omitHeader = TRUE) } } solrium/man/core_requeststatus.Rd0000644000176200001440000000153213167507346016767 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/core_requeststatus.R \name{core_requeststatus} \alias{core_requeststatus} \title{Request status of asynchronous CoreAdmin API call} \usage{ core_requeststatus(conn, requestid, raw = FALSE, callopts = list()) } \arguments{ \item{conn}{A solrium connection object, see \link{SolrClient}} \item{requestid}{The name of one of the cores to be removed. Required} \item{raw}{(logical) If \code{TRUE}, returns raw data} \item{callopts}{curl options passed on to \link[crul:HttpClient]{crul::HttpClient}} } \description{ Request status of asynchronous CoreAdmin API call } \examples{ \dontrun{ # start Solr with Schemaless mode via the schemaless eg: # bin/solr start -e schemaless # FIXME: not tested yet... # (conn <- SolrClient$new()) # conn$core_requeststatus(requestid = 1) } } solrium/man/schema.Rd0000644000176200001440000000374213167507346014270 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/schema.R \name{schema} \alias{schema} \title{Get the schema for a collection or core} \usage{ schema(conn, name, what = "", raw = FALSE, ...) } \arguments{ \item{conn}{A solrium connection object, see \link{SolrClient}} \item{name}{(character) Name of a collection or core. Required.} \item{what}{(character) What to retrieve. By default, we retrieve the entire schema. Options include: fields, dynamicfields, fieldtypes, copyfields, name, version, uniquekey, similarity, "solrqueryparser/defaultoperator"} \item{raw}{(logical) If \code{TRUE}, returns raw data in format specified by \code{wt} param} \item{...}{curl options passed on to \link[crul:HttpClient]{crul::HttpClient}} } \description{ Get the schema for a collection or core } \examples{ \dontrun{ # start Solr, in your CLI, run: `bin/solr start -e cloud -noprompt` # after that, if you haven't run `bin/post -c gettingstarted docs/` yet, do so # connect: by default we connect to localhost, port 8983 (cli <- SolrClient$new()) # get the schema for the gettingstarted index schema(cli, name = "gettingstarted") # Get parts of the schema schema(cli, name = "gettingstarted", "fields") schema(cli, name = "gettingstarted", "dynamicfields") schema(cli, name = "gettingstarted", "fieldtypes") schema(cli, name = "gettingstarted", "copyfields") schema(cli, name = "gettingstarted", "name") schema(cli, name = "gettingstarted", "version") schema(cli, name = "gettingstarted", "uniquekey") schema(cli, name = "gettingstarted", "similarity") schema(cli, name = "gettingstarted", "solrqueryparser/defaultoperator") # get raw data schema(cli, name = "gettingstarted", "similarity", raw = TRUE) schema(cli, name = "gettingstarted", "uniquekey", raw = TRUE) # start Solr in Schemaless mode: bin/solr start -e schemaless # schema(cli, "gettingstarted") # start Solr in Standalone mode: bin/solr start # then add a core: bin/solr create -c helloWorld # schema(cli, "helloWorld") } } solrium/man/add.Rd0000644000176200001440000000505413167507346013556 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/add.R \name{add} \alias{add} \title{Add documents from R objects} \usage{ add(x, conn, name, commit = TRUE, commit_within = NULL, overwrite = TRUE, boost = NULL, wt = "json", raw = FALSE, ...) } \arguments{ \item{x}{Documents, either as rows in a data.frame, or a list.} \item{conn}{A solrium connection object, see \link{SolrClient}} \item{name}{(character) A collection or core name. Required.} \item{commit}{(logical) If \code{TRUE}, documents immediately searchable. Default: \code{TRUE}} \item{commit_within}{(numeric) Milliseconds to commit the change, the document will be added within that time. Default: NULL} \item{overwrite}{(logical) Overwrite documents with matching keys. Default: \code{TRUE}} \item{boost}{(numeric) Boost factor. Default: NULL} \item{wt}{(character) One of json (default) or xml. If json, uses \code{\link[jsonlite]{fromJSON}} to parse. If xml, uses \code{\link[xml2]{read_xml}} to parse} \item{raw}{(logical) If \code{TRUE}, returns raw data in format specified by \code{wt} param} \item{...}{curl options passed on to \link[crul:HttpClient]{crul::HttpClient}} } \description{ Add documents from R objects } \details{ Works for Collections as well as Cores (in SolrCloud and Standalone modes, respectively) } \examples{ \dontrun{ (cli <- SolrClient$new()) # create the boooks collection if (!collection_exists(cli, "books")) { collection_create(cli, name = "books", numShards = 1) } # Documents in a list ss <- list(list(id = 1, price = 100), list(id = 2, price = 500)) add(ss, cli, name = "books") cli$get(c(1, 2), "books") # Documents in a data.frame ## Simple example df <- data.frame(id = c(67, 68), price = c(1000, 500000000)) add(df, cli, "books") df <- data.frame(id = c(77, 78), price = c(1, 2.40)) add(df, "books") ## More complex example, get file from package examples # start Solr in Schemaless mode first: bin/solr start -e schemaless file <- system.file("examples", "books.csv", package = "solrium") x <- read.csv(file, stringsAsFactors = FALSE) class(x) head(x) if (!collection_exists("mybooks")) { collection_create(name = "mybooks", numShards = 2) } add(x, "mybooks") # Use modifiers add(x, "mybooks", commit_within = 5000) # Get back XML instead of a list ss <- list(list(id = 1, price = 100), list(id = 2, price = 500)) # parsed XML add(ss, name = "books", wt = "xml") # raw XML add(ss, name = "books", wt = "xml", raw = TRUE) } } \seealso{ \code{\link{update_json}}, \code{\link{update_xml}}, \code{\link{update_csv}} for adding documents from files } solrium/man/config_params.Rd0000644000176200001440000000401013176256017015621 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/config_params.R \name{config_params} \alias{config_params} \title{Set Solr configuration params} \usage{ config_params(conn, name, param = NULL, set = NULL, unset = NULL, update = NULL, ...) } \arguments{ \item{conn}{A solrium connection object, see \link{SolrClient}} \item{name}{(character) The name of the core. If not given, all cores.} \item{param}{(character) Name of a parameter} \item{set}{(list) List of key:value pairs of what to set. Create or overwrite a parameter set map. Default: NULL (nothing passed)} \item{unset}{(list) One or more character strings of keys to unset. Default: NULL (nothing passed)} \item{update}{(list) List of key:value pairs of what to update. Updates a parameter set map. This essentially overwrites the old parameter set, so all parameters must be sent in each update request.} \item{...}{curl options passed on to \link[crul:HttpClient]{crul::HttpClient}} } \value{ A list with response from server } \description{ Set Solr configuration params } \details{ The Request Parameters API allows creating parameter sets that can override or take the place of parameters defined in solrconfig.xml. It is really another endpoint of the Config API instead of a separate API, and has distinct commands. It does not replace or modify any sections of solrconfig.xml, but instead provides another approach to handling parameters used in requests. It behaves in the same way as the Config API, by storing parameters in another file that will be used at runtime. In this case, the parameters are stored in a file named params.json. This file is kept in ZooKeeper or in the conf directory of a standalone Solr instance. } \examples{ \dontrun{ # start Solr in standard or Cloud mode # connect (conn <- SolrClient$new()) # set a parameter set myFacets <- list(myFacets = list(facet = TRUE, facet.limit = 5)) config_params(conn, "gettingstarted", set = myFacets) # check a parameter config_params(conn, "gettingstarted", param = "myFacets") } } solrium/man/solr_parse.Rd0000644000176200001440000000270713167507346015201 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/parsers.R \name{solr_parse} \alias{solr_parse} \alias{solr_parse.sr_high} \alias{solr_parse.sr_search} \alias{solr_parse.sr_all} \alias{solr_parse.sr_mlt} \alias{solr_parse.sr_stats} \alias{solr_parse.sr_group} \title{Parse raw data from solr_search, solr_facet, or solr_highlight.} \usage{ solr_parse(input, parsetype = NULL, concat) \method{solr_parse}{sr_high}(input, parsetype = "list", concat = ",") \method{solr_parse}{sr_search}(input, parsetype = "list", concat = ",") \method{solr_parse}{sr_all}(input, parsetype = "list", concat = ",") \method{solr_parse}{sr_mlt}(input, parsetype = "list", concat = ",") \method{solr_parse}{sr_stats}(input, parsetype = "list", concat = ",") \method{solr_parse}{sr_group}(input, parsetype = "list", concat = ",") } \arguments{ \item{input}{Output from solr_facet} \item{parsetype}{One of 'list' or 'df' (data.frame)} \item{concat}{Character to conactenate strings by, e.g,. ',' (character). Used in solr_parse.sr_search only.} } \description{ Parse raw data from solr_search, solr_facet, or solr_highlight. } \details{ This is the parser used internally in solr_facet, but if you output raw data from solr_facet using raw=TRUE, then you can use this function to parse that data (a sr_facet S3 object) after the fact to a list of data.frame's for easier consumption. The data format type is detected from the attribute "wt" on the sr_facet object. } solrium/man/solr_mlt.Rd0000644000176200001440000001110613176421234014643 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/solr_mlt.r \name{solr_mlt} \alias{solr_mlt} \title{"more like this" search} \usage{ solr_mlt(conn, name = NULL, params = NULL, body = NULL, callopts = list(), raw = FALSE, parsetype = "df", concat = ",", optimizeMaxRows = TRUE, minOptimizedRows = 50000L, ...) } \arguments{ \item{conn}{A solrium connection object, see \link{SolrClient}} \item{name}{Name of a collection or core. Or leave as \code{NULL} if not needed.} \item{params}{(list) a named list of parameters, results in a GET reqeust as long as no body parameters given} \item{body}{(list) a named list of parameters, if given a POST request will be performed} \item{callopts}{Call options passed on to [crul::HttpClient]} \item{raw}{(logical) If TRUE, returns raw data in format specified by wt param} \item{parsetype}{(character) One of 'list' or 'df'} \item{concat}{(character) Character to concatenate elements of longer than length 1. Note that this only works reliably when data format is json (wt='json'). The parsing is more complicated in XML format, but you can do that on your own.} \item{optimizeMaxRows}{(logical) If \code{TRUE}, then rows parameter will be adjusted to the number of returned results by the same constraints. It will only be applied if rows parameter is higher than \code{minOptimizedRows}. Default: \code{TRUE}} \item{minOptimizedRows}{(numeric) used by \code{optimizedMaxRows} parameter, the minimum optimized rows. Default: 50000} \item{...}{Further args to be combined into query} } \value{ XML, JSON, a list, or data.frame } \description{ Returns only more like this items } \section{More like this parameters}{ \itemize{ \item q Query terms, defaults to '*:*', or everything. \item fq Filter query, this does not affect the search, only what gets returned \item mlt.count The number of similar documents to return for each result. Default is 5. \item mlt.fl The fields to use for similarity. NOTE: if possible these should have a stored TermVector DEFAULT_FIELD_NAMES = new String[] {"contents"} \item mlt.mintf Minimum Term Frequency - the frequency below which terms will be ignored in the source doc. DEFAULT_MIN_TERM_FREQ = 2 \item mlt.mindf Minimum Document Frequency - the frequency at which words will be ignored which do not occur in at least this many docs. DEFAULT_MIN_DOC_FREQ = 5 \item mlt.minwl minimum word length below which words will be ignored. DEFAULT_MIN_WORD_LENGTH = 0 \item mlt.maxwl maximum word length above which words will be ignored. DEFAULT_MAX_WORD_LENGTH = 0 \item mlt.maxqt maximum number of query terms that will be included in any generated query. DEFAULT_MAX_QUERY_TERMS = 25 \item mlt.maxntp maximum number of tokens to parse in each example doc field that is not stored with TermVector support. DEFAULT_MAX_NUM_TOKENS_PARSED = 5000 \item mlt.boost [true/false] set if the query will be boosted by the interesting term relevance. DEFAULT_BOOST = false \item mlt.qf Query fields and their boosts using the same format as that used in DisMaxQParserPlugin. These fields must also be specified in mlt.fl. \item fl Fields to return. We force 'id' to be returned so that there is a unique identifier with each record. \item wt (character) Data type returned, defaults to 'json'. One of json or xml. If json, uses \code{\link[jsonlite]{fromJSON}} to parse. If xml, uses \code{\link[XML]{xmlParse}} to parse. csv is only supported in \code{\link{solr_search}} and \code{\link{solr_all}}. \item start Record to start at, default to beginning. \item rows Number of records to return. Defaults to 10. \item key API key, if needed. } } \examples{ \dontrun{ # connect (conn <- SolrClient$new(host = "api.plos.org", path = "search", port = NULL)) # more like this search conn$mlt(params = list(q='*:*', mlt.count=2, mlt.fl='abstract', fl='score', fq="doc_type:full")) conn$mlt(params = list(q='*:*', rows=2, mlt.fl='title', mlt.mindf=1, mlt.mintf=1, fl='alm_twitterCount')) conn$mlt(params = list(q='title:"ecology" AND body:"cell"', mlt.fl='title', mlt.mindf=1, mlt.mintf=1, fl='counter_total_all', rows=5)) conn$mlt(params = list(q='ecology', mlt.fl='abstract', fl='title', rows=5)) solr_mlt(conn, params = list(q='ecology', mlt.fl='abstract', fl=c('score','eissn'), rows=5)) solr_mlt(conn, params = list(q='ecology', mlt.fl='abstract', fl=c('score','eissn'), rows=5, wt = "xml")) # get raw data, and parse later if needed out <- solr_mlt(conn, params=list(q='ecology', mlt.fl='abstract', fl='title', rows=2), raw=TRUE) solr_parse(out, "df") } } \references{ See \url{http://wiki.apache.org/solr/MoreLikeThis} for more information. } solrium/man/collection_reload.Rd0000644000176200001440000000136513176242547016507 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/collection_reload.R \name{collection_reload} \alias{collection_reload} \title{Reload a collection} \usage{ collection_reload(conn, name, raw = FALSE, callopts) } \arguments{ \item{conn}{A solrium connection object, see \link{SolrClient}} \item{name}{(character) The name of the core to be created. Required} \item{raw}{(logical) If \code{TRUE}, returns raw data} \item{callopts}{curl options passed on to \link[crul:HttpClient]{crul::HttpClient}} } \description{ Reload a collection } \examples{ \dontrun{ (conn <- SolrClient$new()) if (!conn$collection_exists("helloWorld")) { conn$collection_create(name = "helloWorld") } conn$collection_reload(name = "helloWorld") } } solrium/man/collection_removerole.Rd0000644000176200001440000000226513167507346017421 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/collection_removerole.R \name{collection_removerole} \alias{collection_removerole} \title{Remove a role from a node} \usage{ collection_removerole(conn, role = "overseer", node, raw = FALSE, ...) } \arguments{ \item{conn}{A solrium connection object, see \link{SolrClient}} \item{role}{(character) Required. The name of the role. The only supported role as of now is overseer (set as default).} \item{node}{(character) Required. The name of the node.} \item{raw}{(logical) If \code{TRUE}, returns raw data} \item{...}{You can pass in parameters like \code{property.name=value} to set core property name to value. See the section Defining core.properties for details on supported properties and values. (https://lucene.apache.org/solr/guide/7_0/defining-core-properties.html)} } \description{ Remove an assigned role. This API is used to undo the roles assigned using \code{\link{collection_addrole}} } \examples{ \dontrun{ (conn <- SolrClient$new()) # get list of nodes nodes <- conn$collection_clusterstatus()$cluster$live_nodes conn$collection_addrole(node = nodes[1]) conn$collection_removerole(node = nodes[1]) } } solrium/man/config_get.Rd0000644000176200001440000000364613167507346015137 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/config_get.R \name{config_get} \alias{config_get} \title{Get Solr configuration details} \usage{ config_get(conn, name, what = NULL, wt = "json", raw = FALSE, ...) } \arguments{ \item{conn}{A solrium connection object, see \link{SolrClient}} \item{name}{(character) The name of the core. If not given, all cores.} \item{what}{(character) What you want to look at. One of solrconfig or schema. Default: solrconfig} \item{wt}{(character) One of json (default) or xml. Data type returned. If json, uses \code{\link[jsonlite]{fromJSON}} to parse. If xml, uses \code{\link[xml2]{read_xml}} to parse.} \item{raw}{(logical) If \code{TRUE}, returns raw data in format specified by \code{wt}} \item{...}{curl options passed on to \link[crul:HttpClient]{crul::HttpClient}} } \value{ A list, \code{xml_document}, or character } \description{ Get Solr configuration details } \details{ Note that if \code{raw=TRUE}, \code{what} is ignored. That is, you get all the data when \code{raw=TRUE}. } \examples{ \dontrun{ # start Solr with Cloud mode via the schemaless eg: bin/solr -e cloud # you can create a new core like: bin/solr create -c corename # where is the name for your core - or creaate as below # connect (conn <- SolrClient$new()) # all config settings conn$config_get("gettingstarted") # just znodeVersion conn$config_get("gettingstarted", "znodeVersion") # just znodeVersion conn$config_get("gettingstarted", "luceneMatchVersion") # just updateHandler conn$config_get("gettingstarted", "updateHandler") # just updateHandler conn$config_get("gettingstarted", "requestHandler") ## Get XML conn$config_get("gettingstarted", wt = "xml") conn$config_get("gettingstarted", "updateHandler", wt = "xml") conn$config_get("gettingstarted", "requestHandler", wt = "xml") ## Raw data - what param ignored when raw=TRUE conn$config_get("gettingstarted", raw = TRUE) } } solrium/man/collection_createalias.Rd0000644000176200001440000000217613176213037017507 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/collection_createalias.R \name{collection_createalias} \alias{collection_createalias} \title{Create an alias for a collection} \usage{ collection_createalias(conn, alias, collections, raw = FALSE, callopts = list()) } \arguments{ \item{conn}{A solrium connection object, see \link{SolrClient}} \item{alias}{(character) Required. The alias name to be created} \item{collections}{(character) Required. A character vector of collections to be aliased} \item{raw}{(logical) If \code{TRUE}, returns raw data} \item{callopts}{curl options passed on to \code{\link[crul]{HttpClient}}} } \description{ Create a new alias pointing to one or more collections. If an alias by the same name already exists, this action will replace the existing alias, effectively acting like an atomic "MOVE" command. } \examples{ \dontrun{ (conn <- SolrClient$new()) if (!conn$collection_exists("thingsstuff")) { conn$collection_create(name = "thingsstuff") } conn$collection_createalias("tstuff", "thingsstuff") conn$collection_clusterstatus()$cluster$collections$thingsstuff$aliases } } solrium/man/core_swap.Rd0000644000176200001440000000356313167507346015013 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/core_swap.R \name{core_swap} \alias{core_swap} \title{Swap a core} \usage{ core_swap(conn, name, other, async = NULL, raw = FALSE, callopts = list()) } \arguments{ \item{conn}{A solrium connection object, see \link{SolrClient}} \item{name}{(character) The name of the core to be created. Required} \item{other}{(character) The name of one of the cores to be swapped. Required.} \item{async}{(character) Request ID to track this action which will be processed asynchronously} \item{raw}{(logical) If \code{TRUE}, returns raw data} \item{callopts}{curl options passed on to \link[crul:HttpClient]{crul::HttpClient}} } \description{ SWAP atomically swaps the names used to access two existing Solr cores. This can be used to swap new content into production. The prior core remains available and can be swapped back, if necessary. Each core will be known by the name of the other, after the swap } \details{ Do not use \code{core_swap} with a SolrCloud node. It is not supported and can result in the core being unusable. We'll try to stop you if you try. } \examples{ \dontrun{ # start Solr with Schemaless mode via the schemaless eg: # bin/solr start -e schemaless # you can create a new core like: bin/solr create -c corename # where is the name for your core - or creaate as below # connect (conn <- SolrClient$new()) # Swap a core ## First, create two cores conn$core_create("swapcoretest1") # - or create on CLI: bin/solr create -c swapcoretest1 conn$core_create("swapcoretest2") # - or create on CLI: bin/solr create -c swapcoretest2 ## check status conn$core_status("swapcoretest1", FALSE) conn$core_status("swapcoretest2", FALSE) ## swap core conn$core_swap("swapcoretest1", "swapcoretest2") ## check status again conn$core_status("swapcoretest1", FALSE) conn$core_status("swapcoretest2", FALSE) } } solrium/man/collection_clusterprop.Rd0000644000176200001440000000264413176211266017616 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/collection_clusterprop.R \name{collection_clusterprop} \alias{collection_clusterprop} \title{Add, edit, delete a cluster-wide property} \usage{ collection_clusterprop(conn, name, val, raw = FALSE, callopts = list()) } \arguments{ \item{conn}{A solrium connection object, see \link{SolrClient}} \item{name}{(character) Name of the core or collection} \item{val}{(character) Required. The value of the property. If the value is empty or null, the property is unset.} \item{raw}{(logical) If \code{TRUE}, returns raw data in format specified by \code{wt} param} \item{callopts}{curl options passed on to \link[crul:HttpClient]{crul::HttpClient}} } \description{ Important: whether add, edit, or delete is used is determined by the value passed to the \code{val} parameter. If the property name is new, it will be added. If the property name exists, and the value is different, it will be edited. If the property name exists, and the value is \code{NULL} or empty the property is deleted (unset). } \examples{ \dontrun{ (conn <- SolrClient$new()) # add the value https to the property urlScheme collection_clusterprop(conn, name = "urlScheme", val = "https") # status again collection_clusterstatus(conn)$cluster$properties # delete the property urlScheme by setting val to NULL or a 0 length string collection_clusterprop(conn, name = "urlScheme", val = "") } } solrium/man/collection_deletereplica.Rd0000644000176200001440000000414613176214143020032 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/collection_deletereplica.R \name{collection_deletereplica} \alias{collection_deletereplica} \title{Delete a replica} \usage{ collection_deletereplica(conn, name, shard = NULL, replica = NULL, onlyIfDown = FALSE, raw = FALSE, callopts = list(), ...) } \arguments{ \item{conn}{A solrium connection object, see \link{SolrClient}} \item{name}{(character) Required. The name of the collection.} \item{shard}{(character) Required. The name of the shard that includes the replica to be removed.} \item{replica}{(character) Required. The name of the replica to remove.} \item{onlyIfDown}{(logical) When \code{TRUE} will not take any action if the replica is active. Default: \code{FALSE}} \item{raw}{(logical) If \code{TRUE}, returns raw data} \item{callopts}{curl options passed on to \link[crul:HttpClient]{crul::HttpClient}} \item{...}{You can pass in parameters like \code{property.name=value} to set core property name to value. See the section Defining core.properties for details on supported properties and values. (https://cwiki.apache.org/confluence/display/solr/Defining+core.properties)} } \description{ Delete a replica from a given collection and shard. If the corresponding core is up and running the core is unloaded and the entry is removed from the clusterstate. If the node/core is down , the entry is taken off the clusterstate and if the core comes up later it is automatically unregistered. } \examples{ \dontrun{ (conn <- SolrClient$new()) # create collection if (!conn$collection_exists("foobar2")) { conn$collection_create(name = "foobar2", maxShardsPerNode = 2) } # status conn$collection_clusterstatus()$cluster$collections$foobar2$shards$shard1 # add replica conn$collection_addreplica(name = "foobar2", shard = "shard1") # delete replica ## get replica name nms <- names(conn$collection_clusterstatus()$cluster$collections$foobar2$shards$shard1$replicas) conn$collection_deletereplica(name = "foobar2", shard = "shard1", replica = nms[1]) # status again conn$collection_clusterstatus()$cluster$collections$foobar2$shards$shard1 } } solrium/man/core_split.Rd0000644000176200001440000000645613167507346015200 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/core_split.R \name{core_split} \alias{core_split} \title{Split a core} \usage{ core_split(conn, name, path = NULL, targetCore = NULL, ranges = NULL, split.key = NULL, async = NULL, raw = FALSE, callopts = list()) } \arguments{ \item{conn}{A solrium connection object, see \link{SolrClient}} \item{name}{(character) The name of the core to be created. Required} \item{path}{(character) Two or more target directory paths in which a piece of the index will be written} \item{targetCore}{(character) Two or more target Solr cores to which a piece of the index will be merged} \item{ranges}{(character) A list of number ranges, or hash ranges in hexadecimal format. If numbers, they get converted to hexidecimal format before being passed to your Solr server.} \item{split.key}{(character) The key to be used for splitting the index} \item{async}{(character) Request ID to track this action which will be processed asynchronously} \item{raw}{(logical) If \code{TRUE}, returns raw data} \item{callopts}{curl options passed on to \link[crul:HttpClient]{crul::HttpClient}} } \description{ SPLIT splits an index into two or more indexes. The index being split can continue to handle requests. The split pieces can be placed into a specified directory on the server's filesystem or it can be merged into running Solr cores. } \details{ The core index will be split into as many pieces as the number of \code{path} or \code{targetCore} parameters. Either \code{path} or \code{targetCore} parameter must be specified but not both. The \code{ranges} and \code{split.key} parameters are optional and only one of the two should be specified, if at all required. } \examples{ \dontrun{ # start Solr with Schemaless mode via the schemaless eg: bin/solr start -e schemaless # you can create a new core like: bin/solr create -c corename # where is the name for your core - or creaate as below # connect (conn <- SolrClient$new()) # Swap a core ## First, create two cores # conn$core_split("splitcoretest0") # or create in the CLI: bin/solr create -c splitcoretest0 # conn$core_split("splitcoretest1") # or create in the CLI: bin/solr create -c splitcoretest1 # conn$core_split("splitcoretest2") # or create in the CLI: bin/solr create -c splitcoretest2 ## check status conn$core_status("splitcoretest0", FALSE) conn$core_status("splitcoretest1", FALSE) conn$core_status("splitcoretest2", FALSE) ## split core using targetCore parameter conn$core_split("splitcoretest0", targetCore = c("splitcoretest1", "splitcoretest2")) ## split core using split.key parameter ### Here all documents having the same route key as the split.key i.e. 'A!' ### will be split from the core index and written to the targetCore conn$core_split("splitcoretest0", targetCore = "splitcoretest1", split.key = "A!") ## split core using ranges parameter ### Solr expects hash ranges in hexidecimal, but since we're in R, ### let's not make our lives any harder, so you can pass in numbers ### but you can still pass in hexidecimal if you want. rgs <- c('0-1f4', '1f5-3e8') conn$core_split("splitcoretest0", targetCore = c("splitcoretest1", "splitcoretest2"), ranges = rgs) rgs <- list(c(0, 500), c(501, 1000)) conn$core_split("splitcoretest0", targetCore = c("splitcoretest1", "splitcoretest2"), ranges = rgs) } } solrium/man/solr_group.Rd0000644000176200001440000001622113176437707015222 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/solr_group.r \name{solr_group} \alias{solr_group} \title{Grouped search} \usage{ solr_group(conn, name = NULL, params = NULL, body = NULL, callopts = list(), raw = FALSE, parsetype = "df", concat = ",", ...) } \arguments{ \item{conn}{A solrium connection object, see \link{SolrClient}} \item{name}{Name of a collection or core. Or leave as \code{NULL} if not needed.} \item{params}{(list) a named list of parameters, results in a GET reqeust as long as no body parameters given} \item{body}{(list) a named list of parameters, if given a POST request will be performed} \item{callopts}{Call options passed on to [crul::HttpClient]} \item{raw}{(logical) If TRUE, returns raw data in format specified by wt param} \item{parsetype}{(character) One of 'list' or 'df'} \item{concat}{(character) Character to concatenate elements of longer than length 1. Note that this only works reliably when data format is json (wt='json'). The parsing is more complicated in XML format, but you can do that on your own.} \item{...}{Further args to be combined into query} } \value{ XML, JSON, a list, or data.frame } \description{ Returns only group items } \section{Group parameters}{ \itemize{ \item q Query terms, defaults to '*:*', or everything. \item fq Filter query, this does not affect the search, only what gets returned \item fl Fields to return \item wt (character) Data type returned, defaults to 'json'. One of json or xml. If json, uses \code{\link[jsonlite]{fromJSON}} to parse. If xml, uses \code{\link[XML]{xmlParse}} to parse. csv is only supported in \code{\link{solr_search}} and \code{\link{solr_all}}. \item key API key, if needed. \item group.field [fieldname] Group based on the unique values of a field. The field must currently be single-valued and must be either indexed, or be another field type that has a value source and works in a function query - such as ExternalFileField. Note: for Solr 3.x versions the field must by a string like field such as StrField or TextField, otherwise a http status 400 is returned. \item group.func [function query] Group based on the unique values of a function query. Solr4.0 This parameter only is supported on 4.0 \item group.query [query] Return a single group of documents that also match the given query. \item rows [number] The number of groups to return. Defaults to 10. \item start [number] The offset into the list of groups. \item group.limit [number] The number of results (documents) to return for each group. Defaults to 1. \item group.offset [number] The offset into the document list of each group. \item sort How to sort the groups relative to each other. For example, sort=popularity desc will cause the groups to be sorted according to the highest popularity doc in each group. Defaults to "score desc". \item group.sort How to sort documents within a single group. Defaults to the same value as the sort parameter. \item group.format One of grouped or simple. If simple, the grouped documents are presented in a single flat list. The start and rows parameters refer to numbers of documents instead of numbers of groups. \item group.main (logical) If true, the result of the last field grouping command is used as the main result list in the response, using group.format=simple \item group.ngroups (logical) If true, includes the number of groups that have matched the query. Default is false. Solr4.1 WARNING: If this parameter is set to true on a sharded environment, all the documents that belong to the same group have to be located in the same shard, otherwise the count will be incorrect. If you are using SolrCloud, consider using "custom hashing" \item group.cache.percent [0-100] If > 0 enables grouping cache. Grouping is executed actual two searches. This option caches the second search. A value of 0 disables grouping caching. Default is 0. Tests have shown that this cache only improves search time with boolean queries, wildcard queries and fuzzy queries. For simple queries like a term query or a match all query this cache has a negative impact on performance } } \examples{ \dontrun{ # connect (cli <- SolrClient$new()) # by default we do a GET request cli$group("gettingstarted", params = list(q='*:*', group.field='compName_s')) # OR solr_group(cli, "gettingstarted", params = list(q='*:*', group.field='compName_s')) # connect (cli <- SolrClient$new(host = "api.plos.org", path = "search", port = NULL)) # Basic group query solr_group(cli, params = list(q='ecology', group.field='journal', group.limit=3, fl=c('id','score'))) solr_group(cli, params = list(q='ecology', group.field='journal', group.limit=3, fl='article_type')) # Different ways to sort (notice diff btw sort of group.sort) # note that you can only sort on a field if you return that field solr_group(cli, params = list(q='ecology', group.field='journal', group.limit=3, fl=c('id','score'))) solr_group(cli, params = list(q='ecology', group.field='journal', group.limit=3, fl=c('id','score','alm_twitterCount'), group.sort='alm_twitterCount desc')) solr_group(cli, params = list(q='ecology', group.field='journal', group.limit=3, fl=c('id','score','alm_twitterCount'), sort='score asc', group.sort='alm_twitterCount desc')) # Two group.field values out <- solr_group(cli, params = list(q='ecology', group.field=c('journal','article_type'), group.limit=3, fl='id'), raw=TRUE) solr_parse(out) solr_parse(out, 'df') # Get two groups, one with alm_twitterCount of 0-10, and another group # with 10 to infinity solr_group(cli, params = list(q='ecology', group.limit=3, fl=c('id','alm_twitterCount'), group.query=c('alm_twitterCount:[0 TO 10]','alm_twitterCount:[10 TO *]'))) # Use of group.format and group.simple. ## The raw data structure of these two calls are slightly different, but ## the parsing inside the function outputs the same results. You can ## of course set raw=TRUE to get back what the data actually look like solr_group(cli, params = list(q='ecology', group.field='journal', group.limit=3, fl=c('id','score'), group.format='simple')) solr_group(cli, params = list(q='ecology', group.field='journal', group.limit=3, fl=c('id','score'), group.format='grouped')) solr_group(cli, params = list(q='ecology', group.field='journal', group.limit=3, fl=c('id','score'), group.format='grouped', group.main='true')) # xml back solr_group(cli, params = list(q='ecology', group.field='journal', group.limit=3, fl=c('id','score'), wt = "xml")) solr_group(cli, params = list(q='ecology', group.field='journal', group.limit=3, fl=c('id','score'), wt = "xml"), parsetype = "list") res <- solr_group(cli, params = list(q='ecology', group.field='journal', group.limit=3, fl=c('id','score'), wt = "xml"), raw = TRUE) library("xml2") xml2::read_xml(unclass(res)) solr_group(cli, params = list(q='ecology', group.field='journal', group.limit=3, fl='article_type', wt = "xml")) solr_group(cli, params = list(q='ecology', group.field='journal', group.limit=3, fl='article_type', wt = "xml"), parsetype = "list") } } \references{ See \url{http://wiki.apache.org/solr/FieldCollapsing} for more information. } \seealso{ \code{\link[=solr_highlight]{solr_highlight()}}, \code{\link[=solr_facet]{solr_facet()}} } solrium/man/collection_list.Rd0000644000176200001440000000117513176214361016204 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/collection_list.R \name{collection_list} \alias{collection_list} \title{List collections} \usage{ collection_list(conn, raw = FALSE, ...) } \arguments{ \item{conn}{A solrium connection object, see \link{SolrClient}} \item{raw}{(logical) If \code{TRUE}, returns raw data in format specified by \code{wt} param} \item{...}{curl options passed on to \link[crul:HttpClient]{crul::HttpClient}} } \description{ List collections } \examples{ \dontrun{ (conn <- SolrClient$new()) conn$collection_list() conn$collection_list()$collections collection_list(conn) } } solrium/man/solr_facet.Rd0000644000176200001440000004367113176437707015161 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/solr_facet.r \name{solr_facet} \alias{solr_facet} \title{Faceted search} \usage{ solr_facet(conn, name = NULL, params = list(q = "*:*"), body = NULL, callopts = list(), raw = FALSE, parsetype = "df", concat = ",", ...) } \arguments{ \item{conn}{A solrium connection object, see \link{SolrClient}} \item{name}{Name of a collection or core. Or leave as \code{NULL} if not needed.} \item{params}{(list) a named list of parameters, results in a GET reqeust as long as no body parameters given} \item{body}{(list) a named list of parameters, if given a POST request will be performed} \item{callopts}{Call options passed on to [crul::HttpClient]} \item{raw}{(logical) If TRUE (default) raw json or xml returned. If FALSE, parsed data returned.} \item{parsetype}{(character) One of 'list' or 'df'} \item{concat}{(character) Character to concatenate elements of longer than length 1. Note that this only works reliably when data format is json (wt='json'). The parsing is more complicated in XML format, but you can do that on your own.} \item{...}{Further args, usually per field arguments for faceting.} } \value{ Raw json or xml, or a list of length 4 parsed elements (usually data.frame's). } \description{ Returns only facet items } \details{ A number of fields can be specified multiple times, in which case you can separate them by commas, like \code{facet.field='journal,subject'}. Those fields are: \itemize{ \item facet.field \item facet.query \item facet.date \item facet.date.other \item facet.date.include \item facet.range \item facet.range.other \item facet.range.include \item facet.pivot } \strong{Options for some parameters}: \strong{facet.sort}: This param determines the ordering of the facet field constraints. \itemize{ \item {count} sort the constraints by count (highest count first) \item {index} to return the constraints sorted in their index order (lexicographic by indexed term). For terms in the ascii range, this will be alphabetically sorted. } The default is count if facet.limit is greater than 0, index otherwise. This parameter can be specified on a per field basis. \strong{facet.method}: This parameter indicates what type of algorithm/method to use when faceting a field. \itemize{ \item {enum} Enumerates all terms in a field, calculating the set intersection of documents that match the term with documents that match the query. This was the default (and only) method for faceting multi-valued fields prior to Solr 1.4. \item {fc} (Field Cache) The facet counts are calculated by iterating over documents that match the query and summing the terms that appear in each document. This was the default method for single valued fields prior to Solr 1.4. \item {fcs} (Field Cache per Segment) works the same as fc except the underlying cache data structure is built for each segment of the index individually } The default value is fc (except for BoolField which uses enum) since it tends to use less memory and is faster then the enumeration method when a field has many unique terms in the index. For indexes that are changing rapidly in NRT situations, fcs may be a better choice because it reduces the overhead of building the cache structures on the first request and/or warming queries when opening a new searcher -- but tends to be somewhat slower then fc for subsequent requests against the same searcher. This parameter can be specified on a per field basis. \strong{facet.date.other}: This param indicates that in addition to the counts for each date range constraint between facet.date.start and facet.date.end, counts should also be computed for... \itemize{ \item {before} All records with field values lower then lower bound of the first range \item {after} All records with field values greater then the upper bound of the last range \item {between} All records with field values between the start and end bounds of all ranges \item {none} Compute none of this information \item {all} Shortcut for before, between, and after } This parameter can be specified on a per field basis. In addition to the all option, this parameter can be specified multiple times to indicate multiple choices -- but none will override all other options. \strong{facet.date.include}: By default, the ranges used to compute date faceting between facet.date.start and facet.date.end are all inclusive of both endpoints, while the "before" and "after" ranges are not inclusive. This behavior can be modified by the facet.date.include param, which can be any combination of the following options... \itemize{ \item{lower} All gap based ranges include their lower bound \item{upper} All gap based ranges include their upper bound \item{edge} The first and last gap ranges include their edge bounds (ie: lower for the first one, upper for the last one) even if the corresponding upper/lower option is not specified \item{outer} The "before" and "after" ranges will be inclusive of their bounds, even if the first or last ranges already include those boundaries. \item{all} Shorthand for lower, upper, edge, outer } This parameter can be specified on a per field basis. This parameter can be specified multiple times to indicate multiple choices. \strong{facet.date.include}: This param indicates that in addition to the counts for each range constraint between facet.range.start and facet.range.end, counts should also be computed for... \itemize{ \item{before} All records with field values lower then lower bound of the first range \item{after} All records with field values greater then the upper bound of the last range \item{between} All records with field values between the start and end bounds of all ranges \item{none} Compute none of this information \item{all} Shortcut for before, between, and after } This parameter can be specified on a per field basis. In addition to the all option, this parameter can be specified multiple times to indicate multiple choices -- but none will override all other options. \strong{facet.range.include}: By default, the ranges used to compute range faceting between facet.range.start and facet.range.end are inclusive of their lower bounds and exclusive of the upper bounds. The "before" range is exclusive and the "after" range is inclusive. This default, equivalent to lower below, will not result in double counting at the boundaries. This behavior can be modified by the facet.range.include param, which can be any combination of the following options... \itemize{ \item{lower} All gap based ranges include their lower bound \item{upper} All gap based ranges include their upper bound \item{edge} The first and last gap ranges include their edge bounds (ie: lower for the first one, upper for the last one) even if the corresponding upper/lower option is not specified \item{outer} The "before" and "after" ranges will be inclusive of their bounds, even if the first or last ranges already include those boundaries. \item{all} Shorthand for lower, upper, edge, outer } Can be specified on a per field basis. Can be specified multiple times to indicate multiple choices. If you want to ensure you don't double-count, don't choose both lower & upper, don't choose outer, and don't choose all. } \section{Facet parameters}{ \itemize{ \item name Name of a collection or core. Or leave as \code{NULL} if not needed. \item q Query terms. See examples. \item facet.query This param allows you to specify an arbitrary query in the Lucene default syntax to generate a facet count. By default, faceting returns a count of the unique terms for a "field", while facet.query allows you to determine counts for arbitrary terms or expressions. This parameter can be specified multiple times to indicate that multiple queries should be used as separate facet constraints. It can be particularly useful for numeric range based facets, or prefix based facets -- see example below (i.e. price:[* TO 500] and price:[501 TO *]). \item facet.field This param allows you to specify a field which should be treated as a facet. It will iterate over each Term in the field and generate a facet count using that Term as the constraint. This parameter can be specified multiple times to indicate multiple facet fields. None of the other params in this section will have any effect without specifying at least one field name using this param. \item facet.prefix Limits the terms on which to facet to those starting with the given string prefix. Note that unlike fq, this does not change the search results -- it merely reduces the facet values returned to those beginning with the specified prefix. This parameter can be specified on a per field basis. \item facet.sort See Details. \item facet.limit This param indicates the maximum number of constraint counts that should be returned for the facet fields. A negative value means unlimited. Default: 100. Can be specified on a per field basis. \item facet.offset This param indicates an offset into the list of constraints to allow paging. Default: 0. This parameter can be specified on a per field basis. \item facet.mincount This param indicates the minimum counts for facet fields should be included in the response. Default: 0. This parameter can be specified on a per field basis. \item facet.missing Set to "true" this param indicates that in addition to the Term based constraints of a facet field, a count of all matching results which have no value for the field should be computed. Default: FALSE. This parameter can be specified on a per field basis. \item facet.method See Details. \item facet.enum.cache.minDf This param indicates the minimum document frequency (number of documents matching a term) for which the filterCache should be used when determining the constraint count for that term. This is only used when facet.method=enum method of faceting. A value greater than zero will decrease memory usage of the filterCache, but increase the query time. When faceting on a field with a very large number of terms, and you wish to decrease memory usage, try a low value of 25 to 50 first. Default: 0, causing the filterCache to be used for all terms in the field. This parameter can be specified on a per field basis. \item facet.threads This param will cause loading the underlying fields used in faceting to be executed in parallel with the number of threads specified. Specify as facet.threads=# where # is the maximum number of threads used. Omitting this parameter or specifying the thread count as 0 will not spawn any threads just as before. Specifying a negative number of threads will spin up to Integer.MAX_VALUE threads. Currently this is limited to the fields, range and query facets are not yet supported. In at least one case this has reduced warmup times from 20 seconds to under 5 seconds. \item facet.date Specify names of fields (of type DateField) which should be treated as date facets. Can be specified multiple times to indicate multiple date facet fields. \item facet.date.start The lower bound for the first date range for all Date Faceting on this field. This should be a single date expression which may use the DateMathParser syntax. Can be specified on a per field basis. \item facet.date.end The minimum upper bound for the last date range for all Date Faceting on this field (see facet.date.hardend for an explanation of what the actual end value may be greater). This should be a single date expression which may use the DateMathParser syntax. Can be specified on a per field basis. \item facet.date.gap The size of each date range expressed as an interval to be added to the lower bound using the DateMathParser syntax. Eg: facet.date.gap=+1DAY. Can be specified on a per field basis. \item facet.date.hardend A Boolean parameter instructing Solr what to do in the event that facet.date.gap does not divide evenly between facet.date.start and facet.date.end. If this is true, the last date range constraint will have an upper bound of facet.date.end; if false, the last date range will have the smallest possible upper bound greater then facet.date.end such that the range is exactly facet.date.gap wide. Default: FALSE. This parameter can be specified on a per field basis. \item facet.date.other See Details. \item facet.date.include See Details. \item facet.range Indicates what field to create range facets for. Example: facet.range=price&facet.range=age \item facet.range.start The lower bound of the ranges. Can be specified on a per field basis. Example: f.price.facet.range.start=0.0&f.age.facet.range.start=10 \item facet.range.end The upper bound of the ranges. Can be specified on a per field basis. Example: f.price.facet.range.end=1000.0&f.age.facet.range.start=99 \item facet.range.gap The size of each range expressed as a value to be added to the lower bound. For date fields, this should be expressed using the DateMathParser syntax. (ie: facet.range.gap=+1DAY). Can be specified on a per field basis. Example: f.price.facet.range.gap=100&f.age.facet.range.gap=10 \item facet.range.hardend A Boolean parameter instructing Solr what to do in the event that facet.range.gap does not divide evenly between facet.range.start and facet.range.end. If this is true, the last range constraint will have an upper bound of facet.range.end; if false, the last range will have the smallest possible upper bound greater then facet.range.end such that the range is exactly facet.range.gap wide. Default: FALSE. This parameter can be specified on a per field basis. \item facet.range.other See Details. \item facet.range.include See Details. \item facet.pivot This param allows you to specify a single comma-separated string of fields to allow you to facet within the results of the parent facet to return counts in the format of SQL group by operation \item facet.pivot.mincount This param indicates the minimum counts for facet fields to be included in the response. Default: 0. This parameter should only be specified once. \item start Record to start at, default to beginning. \item rows Number of records to return. \item key API key, if needed. \item wt (character) Data type returned, defaults to 'json'. One of json or xml. If json, uses \code{\link[jsonlite]{fromJSON}} to parse. If xml, uses \code{\link[XML]{xmlParse}} to parse. csv is only supported in \code{\link{solr_search}} and \code{\link{solr_all}}. } } \examples{ \dontrun{ # connect - local Solr instance (cli <- SolrClient$new()) cli$facet("gettingstarted", params = list(q="*:*", facet.field='name')) cli$facet("gettingstarted", params = list(q="*:*", facet.field='name'), callopts = list(verbose = TRUE)) cli$facet("gettingstarted", body = list(q="*:*", facet.field='name'), callopts = list(verbose = TRUE)) # Facet on a single field solr_facet(cli, "gettingstarted", params = list(q='*:*', facet.field='name')) # Remote instance (cli <- SolrClient$new(host = "api.plos.org", path = "search", port = NULL)) # Facet on multiple fields solr_facet(cli, params = list(q='alcohol', facet.field = c('journal','subject'))) # Using mincount solr_facet(cli, params = list(q='alcohol', facet.field='journal', facet.mincount='500')) # Using facet.query to get counts solr_facet(cli, params = list(q='*:*', facet.field='journal', facet.query=c('cell','bird'))) # Using facet.pivot to simulate SQL group by counts solr_facet(cli, params = list(q='alcohol', facet.pivot='journal,subject', facet.pivot.mincount=10)) ## two or more fields are required - you can pass in as a single ## character string solr_facet(cli, params = list(q='*:*', facet.pivot = "journal,subject", facet.limit = 3)) ## Or, pass in as a vector of length 2 or greater solr_facet(cli, params = list(q='*:*', facet.pivot = c("journal", "subject"), facet.limit = 3)) # Date faceting solr_facet(cli, params = list(q='*:*', facet.date='publication_date', facet.date.start='NOW/DAY-5DAYS', facet.date.end='NOW', facet.date.gap='+1DAY')) ## two variables solr_facet(cli, params = list(q='*:*', facet.date=c('publication_date', 'timestamp'), facet.date.start='NOW/DAY-5DAYS', facet.date.end='NOW', facet.date.gap='+1DAY')) # Range faceting solr_facet(cli, params = list(q='*:*', facet.range='counter_total_all', facet.range.start=5, facet.range.end=1000, facet.range.gap=10)) # Range faceting with > 1 field, same settings solr_facet(cli, params = list(q='*:*', facet.range=c('counter_total_all','alm_twitterCount'), facet.range.start=5, facet.range.end=1000, facet.range.gap=10)) # Range faceting with > 1 field, different settings solr_facet(cli, params = list(q='*:*', facet.range=c('counter_total_all','alm_twitterCount'), f.counter_total_all.facet.range.start=5, f.counter_total_all.facet.range.end=1000, f.counter_total_all.facet.range.gap=10, f.alm_twitterCount.facet.range.start=5, f.alm_twitterCount.facet.range.end=1000, f.alm_twitterCount.facet.range.gap=10)) # Get raw json or xml ## json solr_facet(cli, params = list(q='*:*', facet.field='journal'), raw=TRUE) ## xml solr_facet(cli, params = list(q='*:*', facet.field='journal', wt='xml'), raw=TRUE) # Get raw data back, and parse later, same as what goes on internally if # raw=FALSE (Default) out <- solr_facet(cli, params = list(q='*:*', facet.field='journal'), raw=TRUE) solr_parse(out) out <- solr_facet(cli, params = list(q='*:*', facet.field='journal', wt = 'xml'), raw=TRUE) solr_parse(out) # Using the USGS BISON API (https://bison.usgs.gov/#solr) ## The occurrence endpoint (cli <- SolrClient$new(host = "bison.usgs.gov", scheme = "https", path = "solr/occurrences/select", port = NULL)) solr_facet(cli, params = list(q='*:*', facet.field='year')) solr_facet(cli, params = list(q='*:*', facet.field='computedStateFips')) # using a proxy # cli <- SolrClient$new(host = "api.plos.org", path = "search", port = NULL, # proxy = list(url = "http://54.195.48.153:8888")) # solr_facet(cli, params = list(facet.field='journal'), # callopts=list(verbose=TRUE)) } } \references{ See \url{http://wiki.apache.org/solr/SimpleFacetParameters} for more information on faceting. } \seealso{ \code{\link[=solr_search]{solr_search()}}, \code{\link[=solr_highlight]{solr_highlight()}}, \code{\link[=solr_parse]{solr_parse()}} } solrium/man/update_json.Rd0000644000176200001440000000544713176206202015332 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/update_json.R \name{update_json} \alias{update_json} \title{Update documents with JSON data} \usage{ update_json(conn, files, name, commit = TRUE, optimize = FALSE, max_segments = 1, expunge_deletes = FALSE, wait_searcher = TRUE, soft_commit = FALSE, prepare_commit = NULL, wt = "json", raw = FALSE, ...) } \arguments{ \item{conn}{A solrium connection object, see \link{SolrClient}} \item{files}{Path to a single file to load into Solr} \item{name}{(character) Name of the core or collection} \item{commit}{(logical) If \code{TRUE}, documents immediately searchable. Deafult: \code{TRUE}} \item{optimize}{Should index optimization be performed before the method returns. Default: \code{FALSE}} \item{max_segments}{optimizes down to at most this number of segments. Default: 1} \item{expunge_deletes}{merge segments with deletes away. Default: \code{FALSE}} \item{wait_searcher}{block until a new searcher is opened and registered as the main query searcher, making the changes visible. Default: \code{TRUE}} \item{soft_commit}{perform a soft commit - this will refresh the 'view' of the index in a more performant manner, but without "on-disk" guarantees. Default: \code{FALSE}} \item{prepare_commit}{The prepareCommit command is an expert-level API that calls Lucene's IndexWriter.prepareCommit(). Not passed by default} \item{wt}{(character) One of json (default) or xml. If json, uses \code{\link[jsonlite]{fromJSON}} to parse. If xml, uses \code{\link[xml2]{read_xml}} to parse} \item{raw}{(logical) If \code{TRUE}, returns raw data in format specified by \code{wt} param} \item{...}{curl options passed on to \code{\link[crul]{HttpClient}}} } \description{ Update documents with JSON data } \details{ You likely may not be able to run this function against many public Solr services, but should work locally. } \examples{ \dontrun{ # start Solr: bin/solr start -f -c -p 8983 # connect (conn <- SolrClient$new()) # Add documents file <- system.file("examples", "books2.json", package = "solrium") cat(readLines(file), sep = "\\n") conn$update_json(files = file, name = "books") update_json(conn, files = file, name = "books") # Update commands - can include many varying commands ## Add file file <- system.file("examples", "updatecommands_add.json", package = "solrium") cat(readLines(file), sep = "\\n") conn$update_json(file, "books") ## Delete file file <- system.file("examples", "updatecommands_delete.json", package = "solrium") cat(readLines(file), sep = "\\n") conn$update_json(file, "books") # Add and delete in the same document ## Add a document first, that we can later delete ss <- list(list(id = 456, name = "cat")) conn$add(ss, "books") } } \seealso{ Other update: \code{\link{update_csv}}, \code{\link{update_xml}} } solrium/man/solrium-package.Rd0000644000176200001440000000465713167507346016121 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/solrium-package.R \docType{package} \name{solrium-package} \alias{solrium-package} \alias{solrium} \title{General purpose R interface to Solr.} \description{ This package has support for all the search endpoints, as well as a suite of functions for managing a Solr database, including adding and deleting documents. } \section{Important search functions}{ \itemize{ \item \code{\link{solr_search}} - General search, only returns documents \item \code{\link{solr_all}} - General search, including all non-documents in addition to documents: facets, highlights, groups, mlt, stats. \item \code{\link{solr_facet}} - Faceting only (w/o general search) \item \code{\link{solr_highlight}} - Highlighting only (w/o general search) \item \code{\link{solr_mlt}} - More like this (w/o general search) \item \code{\link{solr_group}} - Group search (w/o general search) \item \code{\link{solr_stats}} - Stats search (w/o general search) } } \section{Important Solr management functions}{ \itemize{ \item \code{\link{update_json}} - Add or delete documents using json in a file \item \code{\link{add}} - Add documents via an R list or data.frame \item \code{\link{delete_by_id}} - Delete documents by ID \item \code{\link{delete_by_query}} - Delete documents by query } } \section{Vignettes}{ See the vignettes for help \code{browseVignettes(package = "solrium")} } \section{Performance}{ \code{v0.2} and above of this package will have \code{wt=csv} as the default. This should give significant performance improvement over the previous default of \code{wt=json}, which pulled down json, parsed to an R list, then to a data.frame. With \code{wt=csv}, we pull down csv, and read that in directly to a data.frame. The http library we use, \pkg{crul}, sets gzip compression header by default. As long as compression is used server side, you're good to go on compression, which should be a good peformance boost. See \url{https://wiki.apache.org/solr/SolrPerformanceFactors#Query_Response_Compression} for notes on how to enable compression. There are other notes about Solr performance at \url{https://wiki.apache.org/solr/SolrPerformanceFactors} that can be used server side/in your Solr config, but aren't things to tune here in this R client. Let us know if there's any further performance improvements we can make. } \author{ Scott Chamberlain \email{myrmecocystus@gmail.com} } \keyword{package} solrium/man/collection_addrole.Rd0000644000176200001440000000256513176211125016642 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/collection_addrole.R \name{collection_addrole} \alias{collection_addrole} \title{Add a role to a node} \usage{ collection_addrole(conn, role = "overseer", node, raw = FALSE, ...) } \arguments{ \item{conn}{A solrium connection object, see \link{SolrClient}} \item{role}{(character) Required. The name of the role. The only supported role as of now is overseer (set as default).} \item{node}{(character) Required. The name of the node. It is possible to assign a role even before that node is started.} \item{raw}{(logical) If \code{TRUE}, returns raw data} \item{...}{curl options passed on to \link[crul:HttpClient]{crul::HttpClient}} } \description{ Assign a role to a given node in the cluster. The only supported role as of 4.7 is 'overseer' . Use this API to dedicate a particular node as Overseer. Invoke it multiple times to add more nodes. This is useful in large clusters where an Overseer is likely to get overloaded . If available, one among the list of nodes which are assigned the 'overseer' role would become the overseer. The system would assign the role to any other node if none of the designated nodes are up and running } \examples{ \dontrun{ (conn <- SolrClient$new()) # get list of nodes nodes <- conn$collection_clusterstatus()$cluster$live_nodes collection_addrole(conn, node = nodes[1]) } } solrium/man/core_status.Rd0000644000176200001440000000216113167507346015355 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/core_status.R \name{core_status} \alias{core_status} \title{Get core status} \usage{ core_status(conn, name = NULL, indexInfo = TRUE, raw = FALSE, callopts = list()) } \arguments{ \item{conn}{A solrium connection object, see \link{SolrClient}} \item{name}{(character) The name of the core to be created. Required} \item{indexInfo}{(logical)} \item{raw}{(logical) If \code{TRUE}, returns raw data} \item{callopts}{curl options passed on to \link[crul:HttpClient]{crul::HttpClient}} } \description{ Get core status } \examples{ \dontrun{ # start Solr with Schemaless mode via the schemaless eg: # bin/solr start -e schemaless # you can create a new core like: bin/solr create -c corename # where is the name for your core - or creaate as below # connect (conn <- SolrClient$new()) # Status of all cores conn$core_status() # Status of particular cores conn$core_status("gettingstarted") # Get index info or not ## Default: TRUE conn$core_status("gettingstarted", indexInfo = TRUE) conn$core_status("gettingstarted", indexInfo = FALSE) } } solrium/man/collection_balanceshardunique.Rd0000644000176200001440000000412113176211266021062 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/collection_balanceshardunique.R \name{collection_balanceshardunique} \alias{collection_balanceshardunique} \title{Balance a property} \usage{ collection_balanceshardunique(conn, name, property, onlyactivenodes = TRUE, shardUnique = NULL, raw = FALSE, ...) } \arguments{ \item{conn}{A solrium connection object, see \link{SolrClient}} \item{name}{(character) The name of the core to be created. Required} \item{property}{(character) Required. The property to balance. The literal "property." is prepended to this property if not specified explicitly.} \item{onlyactivenodes}{(logical) Normally, the property is instantiated on active nodes only. If \code{FALSE}, then inactive nodes are also included for distribution. Default: \code{TRUE}} \item{shardUnique}{(logical) Something of a safety valve. There is one pre-defined property (preferredLeader) that defaults this value to \code{TRUE}. For all other properties that are balanced, this must be set to \code{TRUE} or an error message is returned} \item{raw}{(logical) If \code{TRUE}, returns raw data} \item{...}{You can pass in parameters like \code{property.name=value} to set core property name to value. See the section Defining core.properties for details on supported properties and values. (https://lucene.apache.org/solr/guide/7_0/defining-core-properties.html)} } \description{ Insures that a particular property is distributed evenly amongst the physical nodes that make up a collection. If the property already exists on a replica, every effort is made to leave it there. If the property is not on any replica on a shard one is chosen and the property is added. } \examples{ \dontrun{ (conn <- SolrClient$new()) # create collection if (!conn$collection_exists("addrep")) { conn$collection_create(name = "mycollection") # OR: bin/solr create -c mycollection } # balance preferredLeader property conn$collection_balanceshardunique("mycollection", property = "preferredLeader") # examine cluster status conn$collection_clusterstatus()$cluster$collections$mycollection } } solrium/man/update_xml.Rd0000644000176200001440000000602213176206332015153 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/update_xml.R \name{update_xml} \alias{update_xml} \title{Update documents with XML data} \usage{ update_xml(conn, files, name, commit = TRUE, optimize = FALSE, max_segments = 1, expunge_deletes = FALSE, wait_searcher = TRUE, soft_commit = FALSE, prepare_commit = NULL, wt = "json", raw = FALSE, ...) } \arguments{ \item{conn}{A solrium connection object, see \link{SolrClient}} \item{files}{Path to a single file to load into Solr} \item{name}{(character) Name of the core or collection} \item{commit}{(logical) If \code{TRUE}, documents immediately searchable. Deafult: \code{TRUE}} \item{optimize}{Should index optimization be performed before the method returns. Default: \code{FALSE}} \item{max_segments}{optimizes down to at most this number of segments. Default: 1} \item{expunge_deletes}{merge segments with deletes away. Default: \code{FALSE}} \item{wait_searcher}{block until a new searcher is opened and registered as the main query searcher, making the changes visible. Default: \code{TRUE}} \item{soft_commit}{perform a soft commit - this will refresh the 'view' of the index in a more performant manner, but without "on-disk" guarantees. Default: \code{FALSE}} \item{prepare_commit}{The prepareCommit command is an expert-level API that calls Lucene's IndexWriter.prepareCommit(). Not passed by default} \item{wt}{(character) One of json (default) or xml. If json, uses \code{\link[jsonlite]{fromJSON}} to parse. If xml, uses \code{\link[xml2]{read_xml}} to parse} \item{raw}{(logical) If \code{TRUE}, returns raw data in format specified by \code{wt} param} \item{...}{curl options passed on to \code{\link[crul]{HttpClient}}} } \description{ Update documents with XML data } \details{ You likely may not be able to run this function against many public Solr services, but should work locally. } \examples{ \dontrun{ # start Solr: bin/solr start -f -c -p 8983 # connect (conn <- SolrClient$new()) # create a collection if (!conn$collection_exists("books")) { conn$collection_create(name = "books", numShards = 2) } # Add documents file <- system.file("examples", "books.xml", package = "solrium") cat(readLines(file), sep = "\\n") conn$update_xml(file, "books") # Update commands - can include many varying commands ## Add files file <- system.file("examples", "books2_delete.xml", package = "solrium") cat(readLines(file), sep = "\\n") conn$update_xml(file, "books") ## Delete files file <- system.file("examples", "updatecommands_delete.xml", package = "solrium") cat(readLines(file), sep = "\\n") conn$update_xml(file, "books") ## Add and delete in the same document ## Add a document first, that we can later delete ss <- list(list(id = 456, name = "cat")) conn$add(ss, "books") ## Now add a new document, and delete the one we just made file <- system.file("examples", "add_delete.xml", package = "solrium") cat(readLines(file), sep = "\\n") conn$update_xml(file, "books") } } \seealso{ Other update: \code{\link{update_csv}}, \code{\link{update_json}} } solrium/man/collection_rebalanceleaders.Rd0000644000176200001440000000411413176240630020477 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/collection_rebalanceleaders.R \name{collection_rebalanceleaders} \alias{collection_rebalanceleaders} \title{Rebalance leaders} \usage{ collection_rebalanceleaders(conn, name, maxAtOnce = NULL, maxWaitSeconds = NULL, raw = FALSE, ...) } \arguments{ \item{conn}{A solrium connection object, see \link{SolrClient}} \item{name}{(character) The name of the core to be created. Required} \item{maxAtOnce}{(integer) The maximum number of reassignments to have queue up at once. Values <=0 are use the default value Integer.MAX_VALUE. When this number is reached, the process waits for one or more leaders to be successfully assigned before adding more to the queue.} \item{maxWaitSeconds}{(integer) Timeout value when waiting for leaders to be reassigned. NOTE: if maxAtOnce is less than the number of reassignments that will take place, this is the maximum interval that any single wait for at least one reassignment. For example, if 10 reassignments are to take place and maxAtOnce is 1 and maxWaitSeconds is 60, the upper bound on the time that the command may wait is 10 minutes. Default: 60} \item{raw}{(logical) If \code{TRUE}, returns raw data} \item{...}{You can pass in parameters like \code{property.name=value} to set core property name to value. See the section Defining core.properties for details on supported properties and values. (https://lucene.apache.org/solr/guide/7_0/defining-core-properties.html)} } \description{ Reassign leaders in a collection according to the preferredLeader property across active nodes } \examples{ \dontrun{ (conn <- SolrClient$new()) # create collection if (!conn$collection_exists("mycollection2")) { conn$collection_create(name = "mycollection2") # OR: bin/solr create -c mycollection2 } # balance preferredLeader property conn$collection_balanceshardunique("mycollection2", property = "preferredLeader") # balance preferredLeader property conn$collection_rebalanceleaders("mycollection2") # examine cluster status conn$collection_clusterstatus()$cluster$collections$mycollection2 } } solrium/man/is-sr.Rd0000644000176200001440000000057013141431267014047 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/classes.r \name{is.sr_facet} \alias{is.sr_facet} \alias{is.sr_high} \alias{is.sr_search} \title{Test for sr_facet class} \usage{ is.sr_facet(x) is.sr_high(x) is.sr_search(x) } \arguments{ \item{x}{Input} } \description{ Test for sr_facet class Test for sr_high class Test for sr_search class } solrium/man/collection_clusterstatus.Rd0000644000176200001440000000241713167507346020166 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/collection_clusterstatus.R \name{collection_clusterstatus} \alias{collection_clusterstatus} \title{Get cluster status} \usage{ collection_clusterstatus(conn, name = NULL, shard = NULL, raw = FALSE, ...) } \arguments{ \item{conn}{A solrium connection object, see \link{SolrClient}} \item{name}{(character) The name of the core to be created. Required} \item{shard}{(character) The shard(s) for which information is requested. Multiple shard names can be specified as a character vector.} \item{raw}{(logical) If \code{TRUE}, returns raw data} \item{...}{You can pass in parameters like \code{property.name=value} to set core property name to value. See the section Defining core.properties for details on supported properties and values. (https://lucene.apache.org/solr/guide/7_0/defining-core-properties.html)} } \description{ Fetch the cluster status including collections, shards, replicas, configuration name as well as collection aliases and cluster properties. } \examples{ \dontrun{ (conn <- SolrClient$new()) conn$collection_clusterstatus() res <- conn$collection_clusterstatus() res$responseHeader res$cluster res$cluster$collections res$cluster$collections$gettingstarted res$cluster$live_nodes } } solrium/man/core_unload.Rd0000644000176200001440000000300513167507346015312 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/core_unload.R \name{core_unload} \alias{core_unload} \title{Unload (delete) a core} \usage{ core_unload(conn, name, deleteIndex = FALSE, deleteDataDir = FALSE, deleteInstanceDir = FALSE, async = NULL, raw = FALSE, callopts = list()) } \arguments{ \item{conn}{A solrium connection object, see \link{SolrClient}} \item{name}{(character) The name of the core to be created. Required} \item{deleteIndex}{(logical) If \code{TRUE}, will remove the index when unloading the core. Default: \code{FALSE}} \item{deleteDataDir}{(logical) If \code{TRUE}, removes the data directory and all sub-directories. Default: \code{FALSE}} \item{deleteInstanceDir}{(logical) If \code{TRUE}, removes everything related to the core, including the index directory, configuration files and other related files. Default: \code{FALSE}} \item{async}{(character) Request ID to track this action which will be processed asynchronously} \item{raw}{(logical) If \code{TRUE}, returns raw data} \item{callopts}{curl options passed on to \link[crul:HttpClient]{crul::HttpClient}} } \description{ Unload (delete) a core } \examples{ \dontrun{ # start Solr with Schemaless mode via the schemaless eg: # bin/solr start -e schemaless # connect (conn <- SolrClient$new()) # Create a core conn$core_create(name = "books") # Unload a core conn$core_unload(name = "books") ## not found # conn$core_unload(name = "books") # > Error: 400 - Cannot unload non-existent core [books] } } solrium/man/core_mergeindexes.Rd0000644000176200001440000000313413167507346016512 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/core_mergeindexes.R \name{core_mergeindexes} \alias{core_mergeindexes} \title{Merge indexes (cores)} \usage{ core_mergeindexes(conn, name, indexDir = NULL, srcCore = NULL, async = NULL, raw = FALSE, callopts = list()) } \arguments{ \item{conn}{A solrium connection object, see \link{SolrClient}} \item{name}{(character) The name of the core to be created. Required} \item{indexDir}{(character) Multi-valued, directories that would be merged.} \item{srcCore}{(character) Multi-valued, source cores that would be merged.} \item{async}{(character) Request ID to track this action which will be processed asynchronously} \item{raw}{(logical) If \code{TRUE}, returns raw data} \item{callopts}{curl options passed on to \link[crul:HttpClient]{crul::HttpClient}} } \description{ Merges one or more indexes to another index. The indexes must have completed commits, and should be locked against writes until the merge is complete or the resulting merged index may become corrupted. The target core index must already exist and have a compatible schema with the one or more indexes that will be merged to it. } \examples{ \dontrun{ # start Solr with Schemaless mode via the schemaless eg: # bin/solr start -e schemaless # connect (conn <- SolrClient$new()) ## FIXME: not tested yet # use indexDir parameter conn$core_mergeindexes(core="new_core_name", indexDir = c("/solr_home/core1/data/index", "/solr_home/core2/data/index")) # use srcCore parameter conn$core_mergeindexes(name = "new_core_name", srcCore = c('core1', 'core2')) } } solrium/man/collection_exists.Rd0000644000176200001440000000175113167507346016560 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/collection_exists.R \name{collection_exists} \alias{collection_exists} \title{Check if a collection exists} \usage{ collection_exists(conn, name, ...) } \arguments{ \item{conn}{A solrium connection object, see \link{SolrClient}} \item{name}{(character) The name of the core. If not given, all cores.} \item{...}{curl options passed on to \link[crul:HttpClient]{crul::HttpClient}} } \value{ A single boolean, \code{TRUE} or \code{FALSE} } \description{ Check if a collection exists } \details{ Simply calls \code{\link[=collection_list]{collection_list()}} internally } \examples{ \dontrun{ # start Solr with Cloud mode via the schemaless eg: bin/solr -e cloud # you can create a new core like: bin/solr create -c corename # where is the name for your core - or creaate as below (conn <- SolrClient$new()) # exists conn$collection_exists("gettingstarted") # doesn't exist conn$collection_exists("hhhhhh") } } solrium/man/collections.Rd0000644000176200001440000000141413167507346015340 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/collections.R \name{collections} \alias{collections} \alias{cores} \title{List collections or cores} \usage{ collections(conn, ...) cores(conn, ...) } \arguments{ \item{conn}{A solrium connection object, see \link{SolrClient}} \item{...}{curl options passed on to \link[crul:HttpClient]{crul::HttpClient}} } \value{ character vector } \description{ List collections or cores } \details{ Calls \code{\link[=collection_list]{collection_list()}} or \code{\link[=core_status]{core_status()}} internally, and parses out names for you. } \examples{ \dontrun{ # connect (conn <- SolrClient$new()) # list collections conn$collection_list() collections(conn) # list cores conn$core_status() cores(conn) } } solrium/man/collection_addreplica.Rd0000644000176200001440000000447513176210766017334 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/collection_addreplica.R \name{collection_addreplica} \alias{collection_addreplica} \title{Add a replica} \usage{ collection_addreplica(conn, name, shard = NULL, route = NULL, node = NULL, instanceDir = NULL, dataDir = NULL, async = NULL, raw = FALSE, callopts = list(), ...) } \arguments{ \item{conn}{A solrium connection object, see \link{SolrClient}} \item{name}{(character) The name of the core to be created. Required} \item{shard}{(character) The name of the shard to which replica is to be added. If \code{shard} is not given, then \code{route} must be.} \item{route}{(character) If the exact shard name is not known, users may pass the \code{route} value and the system would identify the name of the shard. Ignored if the \code{shard} param is also given} \item{node}{(character) The name of the node where the replica should be created} \item{instanceDir}{(character) The instanceDir for the core that will be created} \item{dataDir}{(character) The directory in which the core should be created} \item{async}{(character) Request ID to track this action which will be processed asynchronously} \item{raw}{(logical) If \code{TRUE}, returns raw data} \item{callopts}{curl options passed on to \link[crul:HttpClient]{crul::HttpClient}} \item{...}{You can pass in parameters like \code{property.name=value} to set core property name to value. See the section Defining core.properties for details on supported properties and values. (https://cwiki.apache.org/confluence/display/solr/Defining+core.properties)} } \description{ Add a replica to a shard in a collection. The node name can be specified if the replica is to be created in a specific node } \examples{ \dontrun{ (conn <- SolrClient$new()) # create collection if (!conn$collection_exists("foobar")) { conn$collection_create(name = "foobar", numShards = 2) # OR bin/solr create -c foobar } # status conn$collection_clusterstatus()$cluster$collections$foobar # add replica if (!conn$collection_exists("foobar")) { conn$collection_addreplica(name = "foobar", shard = "shard1") } # status again conn$collection_clusterstatus()$cluster$collections$foobar conn$collection_clusterstatus()$cluster$collections$foobar$shards conn$collection_clusterstatus()$cluster$collections$foobar$shards$shard1 } } solrium/man/core_exists.Rd0000644000176200001440000000174013167507346015353 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/core_exists.R \name{core_exists} \alias{core_exists} \title{Check if a core exists} \usage{ core_exists(conn, name, callopts = list()) } \arguments{ \item{conn}{A solrium connection object, see \link{SolrClient}} \item{name}{(character) The name of the core to be created. Required} \item{callopts}{curl options passed on to \link[crul:HttpClient]{crul::HttpClient}} } \value{ A single boolean, \code{TRUE} or \code{FALSE} } \description{ Check if a core exists } \details{ Simply calls \code{\link[=core_status]{core_status()}} internally } \examples{ \dontrun{ # start Solr with Schemaless mode via the schemaless eg: # bin/solr start -e schemaless # you can create a new core like: bin/solr create -c corename # where is the name for your core - or create as below # connect (conn <- SolrClient$new()) # exists conn$core_exists("gettingstarted") # doesn't exist conn$core_exists("hhhhhh") } } solrium/man/collection_requeststatus.Rd0000644000176200001440000000212213176242365020163 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/collection_requeststatus.R \name{collection_requeststatus} \alias{collection_requeststatus} \title{Get request status} \usage{ collection_requeststatus(conn, requestid, raw = FALSE, ...) } \arguments{ \item{conn}{A solrium connection object, see \link{SolrClient}} \item{requestid}{(character) Required. The user defined request-id for the request. This can be used to track the status of the submitted asynchronous task. \code{-1} is a special request id which is used to cleanup the stored states for all of the already completed/failed tasks.} \item{raw}{(logical) If \code{TRUE}, returns raw data} \item{...}{You can pass in parameters like \code{property.name=value} to set core property name to value. See the section Defining core.properties for details on supported properties and values. (https://lucene.apache.org/solr/guide/7_0/defining-core-properties.html)} } \description{ Request the status of an already submitted Asynchronous Collection API call. This call is also used to clear up the stored statuses. } solrium/man/solr_optimize.Rd0000644000176200001440000000304713167507346015725 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/optimize.R \name{solr_optimize} \alias{solr_optimize} \title{Optimize} \usage{ solr_optimize(conn, name, max_segments = 1, wait_searcher = TRUE, soft_commit = FALSE, wt = "json", raw = FALSE, ...) } \arguments{ \item{conn}{A solrium connection object, see \link{SolrClient}} \item{name}{(character) A collection or core name. Required.} \item{max_segments}{optimizes down to at most this number of segments. Default: 1} \item{wait_searcher}{block until a new searcher is opened and registered as the main query searcher, making the changes visible. Default: \code{TRUE}} \item{soft_commit}{perform a soft commit - this will refresh the 'view' of the index in a more performant manner, but without "on-disk" guarantees. Default: \code{FALSE}} \item{wt}{(character) One of json (default) or xml. If json, uses \code{\link[jsonlite:fromJSON]{jsonlite::fromJSON()}} to parse. If xml, uses \code{\link[xml2:read_xml]{xml2::read_xml()}} to parse} \item{raw}{(logical) If \code{TRUE}, returns raw data in format specified by \code{wt} param} \item{...}{curl options passed on to \link[crul:HttpClient]{crul::HttpClient}} } \description{ Optimize } \examples{ \dontrun{ (conn <- SolrClient$new()) solr_optimize(conn, "gettingstarted") solr_optimize(conn, "gettingstarted", max_segments = 2) solr_optimize(conn, "gettingstarted", wait_searcher = FALSE) # get xml back solr_optimize(conn, "gettingstarted", wt = "xml") ## raw xml solr_optimize(conn, "gettingstarted", wt = "xml", raw = TRUE) } } solrium/man/collection_migrate.Rd0000644000176200001440000000364313176214600016657 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/collection_migrate.R \name{collection_migrate} \alias{collection_migrate} \title{Migrate documents to another collection} \usage{ collection_migrate(conn, name, target.collection, split.key, forward.timeout = NULL, async = NULL, raw = FALSE, callopts = list()) } \arguments{ \item{conn}{A solrium connection object, see \link{SolrClient}} \item{name}{(character) The name of the core to be created. Required} \item{target.collection}{(character) Required. The name of the target collection to which documents will be migrated} \item{split.key}{(character) Required. The routing key prefix. For example, if uniqueKey is a!123, then you would use split.key=a!} \item{forward.timeout}{(integer) The timeout (seconds), until which write requests made to the source collection for the given \code{split.key} will be forwarded to the target shard. Default: 60} \item{async}{(character) Request ID to track this action which will be processed asynchronously} \item{raw}{(logical) If \code{TRUE}, returns raw data} \item{callopts}{curl options passed on to \link[crul:HttpClient]{crul::HttpClient}} } \description{ Migrate documents to another collection } \examples{ \dontrun{ (conn <- SolrClient$new()) # create collection if (!conn$collection_exists("migrate_from")) { conn$collection_create(name = "migrate_from") # OR: bin/solr create -c migrate_from } # create another collection if (!conn$collection_exists("migrate_to")) { conn$collection_create(name = "migrate_to") # OR bin/solr create -c migrate_to } # add some documents file <- system.file("examples", "books.csv", package = "solrium") x <- read.csv(file, stringsAsFactors = FALSE) conn$add(x, "migrate_from") # migrate some documents from one collection to the other ## FIXME - not sure if this is actually working.... # conn$collection_migrate("migrate_from", "migrate_to", split.key = "05535") } } solrium/man/solr_highlight.Rd0000644000176200001440000002402013176440340016014 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/solr_highlight.r \name{solr_highlight} \alias{solr_highlight} \title{Highlighting search} \usage{ solr_highlight(conn, name = NULL, params = NULL, body = NULL, callopts = list(), raw = FALSE, parsetype = "df", ...) } \arguments{ \item{conn}{A solrium connection object, see \link{SolrClient}} \item{name}{Name of a collection or core. Or leave as \code{NULL} if not needed.} \item{params}{(list) a named list of parameters, results in a GET reqeust as long as no body parameters given} \item{body}{(list) a named list of parameters, if given a POST request will be performed} \item{callopts}{Call options passed on to [crul::HttpClient]} \item{raw}{(logical) If TRUE (default) raw json or xml returned. If FALSE, parsed data returned.} \item{parsetype}{One of list of df (data.frame)} \item{...}{Further args to be combined into query} } \value{ XML, JSON, a list, or data.frame } \description{ Returns only highlight items } \section{Facet parameters}{ \itemize{ \item q Query terms. See examples. \item hl.fl A comma-separated list of fields for which to generate highlighted snippets. If left blank, the fields highlighted for the LuceneQParser are the defaultSearchField (or the df param if used) and for the DisMax parser the qf fields are used. A '*' can be used to match field globs, e.g. 'text_*' or even '*' to highlight on all fields where highlighting is possible. When using '*', consider adding hl.requireFieldMatch=TRUE. \item hl.snippets Max no. of highlighted snippets to generate per field. Note: it is possible for any number of snippets from zero to this value to be generated. This parameter accepts per-field overrides. Default: 1. \item hl.fragsize The size, in characters, of the snippets (aka fragments) created by the highlighter. In the original Highlighter, "0" indicates that the whole field value should be used with no fragmenting. See \url{http://wiki.apache.org/solr/HighlightingParameters} for more info. \item hl.q Set a query request to be highlighted. It overrides q parameter for highlighting. Solr query syntax is acceptable for this parameter. \item hl.mergeContiguous Collapse contiguous fragments into a single fragment. "true" indicates contiguous fragments will be collapsed into single fragment. This parameter accepts per-field overrides. This parameter makes sense for the original Highlighter only. Default: FALSE. \item hl.requireFieldMatch If TRUE, then a field will only be highlighted if the query matched in this particular field (normally, terms are highlighted in all requested fields regardless of which field matched the query). This only takes effect if "hl.usePhraseHighlighter" is TRUE. Default: FALSE. \item hl.maxAnalyzedChars How many characters into a document to look for suitable snippets. This parameter makes sense for the original Highlighter only. Default: 51200. You can assign a large value to this parameter and use hl.fragsize=0 to return highlighting in large fields that have size greater than 51200 characters. \item hl.alternateField If a snippet cannot be generated (due to no terms matching), you can specify a field to use as the fallback. This parameter accepts per-field overrides. \item hl.maxAlternateFieldLength If hl.alternateField is specified, this parameter specifies the maximum number of characters of the field to return. Any value less than or equal to 0 means unlimited. Default: unlimited. \item hl.preserveMulti Preserve order of values in a multiValued list. Default: FALSE. \item hl.maxMultiValuedToExamine When highlighting a multiValued field, stop examining the individual entries after looking at this many of them. Will potentially return 0 snippets if this limit is reached before any snippets are found. If maxMultiValuedToMatch is also specified, whichever limit is hit first will terminate looking for more. Default: Integer.MAX_VALUE \item hl.maxMultiValuedToMatch When highlighting a multiValued field, stop examining the individual entries after looking at this many matches are found. If maxMultiValuedToExamine is also specified, whichever limit is hit first will terminate looking for more. Default: Integer.MAX_VALUE \item hl.formatter Specify a formatter for the highlight output. Currently the only legal value is "simple", which surrounds a highlighted term with a customizable pre- and post text snippet. This parameter accepts per-field overrides. This parameter makes sense for the original Highlighter only. \item hl.simple.pre The text which appears before and after a highlighted term when using the simple formatter. This parameter accepts per-field overrides. The default values are "" and "" This parameter makes sense for the original Highlighter only. Use hl.tag.pre and hl.tag.post for FastVectorHighlighter (see example under hl.fragmentsBuilder) \item hl.simple.post The text which appears before and after a highlighted term when using the simple formatter. This parameter accepts per-field overrides. The default values are "" and "" This parameter makes sense for the original Highlighter only. Use hl.tag.pre and hl.tag.post for FastVectorHighlighter (see example under hl.fragmentsBuilder) \item hl.fragmenter Specify a text snippet generator for highlighted text. The standard fragmenter is gap (which is so called because it creates fixed-sized fragments with gaps for multi-valued fields). Another option is regex, which tries to create fragments that "look like" a certain regular expression. This parameter accepts per-field overrides. Default: "gap" \item hl.fragListBuilder Specify the name of SolrFragListBuilder. This parameter makes sense for FastVectorHighlighter only. To create a fragSize=0 with the FastVectorHighlighter, use the SingleFragListBuilder. This field supports per-field overrides. \item hl.fragmentsBuilder Specify the name of SolrFragmentsBuilder. This parameter makes sense for FastVectorHighlighter only. \item hl.boundaryScanner Configures how the boundaries of fragments are determined. By default, boundaries will split at the character level, creating a fragment such as "uick brown fox jumps over the la". Valid entries are breakIterator or simple, with breakIterator being the most commonly used. This parameter makes sense for FastVectorHighlighter only. \item hl.bs.maxScan Specify the length of characters to be scanned by SimpleBoundaryScanner. Default: 10. This parameter makes sense for FastVectorHighlighter only. \item hl.bs.chars Specify the boundary characters, used by SimpleBoundaryScanner. This parameter makes sense for FastVectorHighlighter only. \item hl.bs.type Specify one of CHARACTER, WORD, SENTENCE and LINE, used by BreakIteratorBoundaryScanner. Default: WORD. This parameter makes sense for FastVectorHighlighter only. \item hl.bs.language Specify the language for Locale that is used by BreakIteratorBoundaryScanner. This parameter makes sense for FastVectorHighlighter only. Valid entries take the form of ISO 639-1 strings. \item hl.bs.country Specify the country for Locale that is used by BreakIteratorBoundaryScanner. This parameter makes sense for FastVectorHighlighter only. Valid entries take the form of ISO 3166-1 alpha-2 strings. \item hl.useFastVectorHighlighter Use FastVectorHighlighter. FastVectorHighlighter requires the field is termVectors=on, termPositions=on and termOffsets=on. This parameter accepts per-field overrides. Default: FALSE \item hl.usePhraseHighlighter Use SpanScorer to highlight phrase terms only when they appear within the query phrase in the document. Default: TRUE. \item hl.highlightMultiTerm If the SpanScorer is also being used, enables highlighting for range/wildcard/fuzzy/prefix queries. Default: FALSE. This parameter makes sense for the original Highlighter only. \item hl.regex.slop Factor by which the regex fragmenter can stray from the ideal fragment size (given by hl.fragsize) to accomodate the regular expression. For instance, a slop of 0.2 with fragsize of 100 should yield fragments between 80 and 120 characters in length. It is usually good to provide a slightly smaller fragsize when using the regex fragmenter. Default: .6. This parameter makes sense for the original Highlighter only. \item hl.regex.pattern The regular expression for fragmenting. This could be used to extract sentences (see example solrconfig.xml) This parameter makes sense for the original Highlighter only. \item hl.regex.maxAnalyzedChars Only analyze this many characters from a field when using the regex fragmenter (after which, the fragmenter produces fixed-sized fragments). Applying a complicated regex to a huge field is expensive. Default: 10000. This parameter makes sense for the original Highlighter only. \item start Record to start at, default to beginning. \item rows Number of records to return. \item wt (character) Data type returned, defaults to 'json'. One of json or xml. If json, uses \code{\link[jsonlite]{fromJSON}} to parse. If xml, uses \code{\link[XML]{xmlParse}} to parse. csv is only supported in \code{\link{solr_search}} and \code{\link{solr_all}}. \item fl Fields to return \item fq Filter query, this does not affect the search, only what gets returned } } \examples{ \dontrun{ # connect (conn <- SolrClient$new(host = "api.plos.org", path = "search", port = NULL)) # highlight search solr_highlight(conn, params = list(q='alcohol', hl.fl = 'abstract', rows=10), parsetype = "list") solr_highlight(conn, params = list(q='alcohol', hl.fl = c('abstract','title'), rows=3), parsetype = "list") # Raw data back ## json solr_highlight(conn, params = list(q='alcohol', hl.fl = 'abstract', rows=10), raw=TRUE) ## xml solr_highlight(conn, params = list(q='alcohol', hl.fl = 'abstract', rows=10, wt='xml'), raw=TRUE) ## parse after getting data back out <- solr_highlight(conn, params = list(q='theoretical math', hl.fl = c('abstract','title'), hl.fragsize=30, rows=10, wt='xml'), raw=TRUE) solr_parse(out, parsetype='list') } } \references{ See \url{http://wiki.apache.org/solr/HighlightingParameters} for more information on highlighting. } \seealso{ \code{\link[=solr_search]{solr_search()}}, \code{\link[=solr_facet]{solr_facet()}} } solrium/man/collection_overseerstatus.Rd0000644000176200001440000000217613167507346020341 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/collection_overseerstatus.R \name{collection_overseerstatus} \alias{collection_overseerstatus} \title{Get overseer status} \usage{ collection_overseerstatus(conn, raw = FALSE, ...) } \arguments{ \item{conn}{A solrium connection object, see \link{SolrClient}} \item{raw}{(logical) If \code{TRUE}, returns raw data} \item{...}{You can pass in parameters like \code{property.name=value} to set core property name to value. See the section Defining core.properties for details on supported properties and values. (https://lucene.apache.org/solr/guide/7_0/defining-core-properties.html)} } \description{ Returns the current status of the overseer, performance statistics of various overseer APIs as well as last 10 failures per operation type. } \examples{ \dontrun{ (conn <- SolrClient$new()) conn$collection_overseerstatus() res <- conn$collection_overseerstatus() res$responseHeader res$leader res$overseer_queue_size res$overseer_work_queue_size res$overseer_operations res$collection_operations res$overseer_queue res$overseer_internal_queue res$collection_queue } } solrium/man/solr_search.Rd0000644000176200001440000002107013176420717015322 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/solr_search.r \name{solr_search} \alias{solr_search} \title{Solr search} \usage{ solr_search(conn, name = NULL, params = list(q = "*:*"), body = NULL, callopts = list(), raw = FALSE, parsetype = "df", concat = ",", optimizeMaxRows = TRUE, minOptimizedRows = 50000L, ...) } \arguments{ \item{conn}{A solrium connection object, see \link{SolrClient}} \item{name}{Name of a collection or core. Or leave as \code{NULL} if not needed.} \item{params}{(list) a named list of parameters, results in a GET reqeust as long as no body parameters given} \item{body}{(list) a named list of parameters, if given a POST request will be performed} \item{callopts}{Call options passed on to [crul::HttpClient]} \item{raw}{(logical) If TRUE, returns raw data in format specified by wt param} \item{parsetype}{(character) One of 'list' or 'df'} \item{concat}{(character) Character to concatenate elements of longer than length 1. Note that this only works reliably when data format is json (wt='json'). The parsing is more complicated in XML format, but you can do that on your own.} \item{optimizeMaxRows}{(logical) If \code{TRUE}, then rows parameter will be adjusted to the number of returned results by the same constraints. It will only be applied if rows parameter is higher than \code{minOptimizedRows}. Default: \code{TRUE}} \item{minOptimizedRows}{(numeric) used by \code{optimizedMaxRows} parameter, the minimum optimized rows. Default: 50000} \item{...}{Further args to be combined into query} } \value{ XML, JSON, a list, or data.frame } \description{ Returns only matched documents, and doesn't return other items, including facets, groups, mlt, stats, and highlights. } \note{ SOLR v1.2 was first version to support csv. See \url{https://issues.apache.org/jira/browse/SOLR-66} } \section{Parameters}{ \itemize{ \item q Query terms, defaults to '*:*', or everything. \item sort Field to sort on. You can specify ascending (e.g., score desc) or descending (e.g., score asc), sort by two fields (e.g., score desc, price asc), or sort by a function (e.g., sum(x_f, y_f) desc, which sorts by the sum of x_f and y_f in a descending order). \item start Record to start at, default to beginning. \item rows Number of records to return. Default: 10. \item pageDoc If you expect to be paging deeply into the results (say beyond page 10, assuming rows=10) and you are sorting by score, you may wish to add the pageDoc and pageScore parameters to your request. These two parameters tell Solr (and Lucene) what the last result (Lucene internal docid and score) of the previous page was, so that when scoring the query for the next set of pages, it can ignore any results that occur higher than that item. To get the Lucene internal doc id, you will need to add [docid] to the &fl list. e.g., q=*:*&start=10&pageDoc=5&pageScore=1.345&fl=[docid],score \item pageScore See pageDoc notes. \item fq Filter query, this does not affect the search, only what gets returned. This parameter can accept multiple items in a lis or vector. You can't pass more than one parameter of the same name, so we get around it by passing multiple queries and we parse internally \item fl Fields to return, can be a character vector like \code{c('id', 'title')}, or a single character vector with one or more comma separated names, like \code{'id,title'} \item defType Specify the query parser to use with this request. \item timeAllowed The time allowed for a search to finish. This value only applies to the search and not to requests in general. Time is in milliseconds. Values <= 0 mean no time restriction. Partial results may be returned (if there are any). \item qt Which query handler used. Options: dismax, others? \item NOW Set a fixed time for evaluating Date based expresions \item TZ Time zone, you can override the default. \item echoHandler If \code{TRUE}, Solr places the name of the handle used in the response to the client for debugging purposes. Default: \item echoParams The echoParams parameter tells Solr what kinds of Request parameters should be included in the response for debugging purposes, legal values include: \itemize{ \item none - don't include any request parameters for debugging \item explicit - include the parameters explicitly specified by the client in the request \item all - include all parameters involved in this request, either specified explicitly by the client, or implicit because of the request handler configuration. } \item wt (character) One of json, xml, or csv. Data type returned, defaults to 'csv'. If json, uses [jsonlite::fromJSON()] to parse. If xml, uses [xml2::read_xml()] to parse. If csv, uses [read.table()] to parse. `wt=csv` gives the fastest performance at least in all the cases we have tested in, thus it's the default value for `wt` } } \examples{ \dontrun{ # Connect to a local Solr instance (cli <- SolrClient$new()) cli$search("gettingstarted", params = list(q = "features:notes")) solr_search(cli, "gettingstarted") solr_search(cli, "gettingstarted", params = list(q = "features:notes")) solr_search(cli, "gettingstarted", body = list(query = "features:notes")) (cli <- SolrClient$new(host = "api.plos.org", path = "search", port = NULL)) cli$search(params = list(q = "*:*")) cli$search(params = list(q = "title:golgi", fl = c('id', 'title'))) cli$search(params = list(q = "*:*", facet = "true")) # search solr_search(cli, params = list(q='*:*', rows=2, fl='id')) # search and return all rows solr_search(cli, params = list(q='*:*', rows=-1, fl='id')) # Search for word ecology in title and cell in the body solr_search(cli, params = list(q='title:"ecology" AND body:"cell"', fl='title', rows=5)) # Search for word "cell" and not "body" in the title field solr_search(cli, params = list(q='title:"cell" -title:"lines"', fl='title', rows=5)) # Wildcards ## Search for word that starts with "cell" in the title field solr_search(cli, params = list(q='title:"cell*"', fl='title', rows=5)) # Proximity searching ## Search for words "sports" and "alcohol" within four words of each other solr_search(cli, params = list(q='everything:"sports alcohol"~7', fl='abstract', rows=3)) # Range searches ## Search for articles with Twitter count between 5 and 10 solr_search(cli, params = list(q='*:*', fl=c('alm_twitterCount','id'), fq='alm_twitterCount:[5 TO 50]', rows=10)) # Boosts ## Assign higher boost to title matches than to body matches ## (compare the two calls) solr_search(cli, params = list(q='title:"cell" abstract:"science"', fl='title', rows=3)) solr_search(cli, params = list(q='title:"cell"^1.5 AND abstract:"science"', fl='title', rows=3)) # FunctionQuery queries ## This kind of query allows you to use the actual values of fields to ## calculate relevancy scores for returned documents ## Here, we search on the product of counter_total_all and alm_twitterCount ## metrics for articles in PLOS Journals solr_search(cli, params = list(q="{!func}product($v1,$v2)", v1 = 'sqrt(counter_total_all)', v2 = 'log(alm_twitterCount)', rows=5, fl=c('id','title'), fq='doc_type:full')) ## here, search on the product of counter_total_all and alm_twitterCount, ## using a new temporary field "_val_" solr_search(cli, params = list(q='_val_:"product(counter_total_all,alm_twitterCount)"', rows=5, fl=c('id','title'), fq='doc_type:full')) ## papers with most citations solr_search(cli, params = list(q='_val_:"max(counter_total_all)"', rows=5, fl=c('id','counter_total_all'), fq='doc_type:full')) ## papers with most tweets solr_search(cli, params = list(q='_val_:"max(alm_twitterCount)"', rows=5, fl=c('id','alm_twitterCount'), fq='doc_type:full')) ## many fq values solr_search(cli, params = list(q="*:*", fl=c('id','alm_twitterCount'), fq=list('doc_type:full','subject:"Social networks"', 'alm_twitterCount:[100 TO 10000]'), sort='counter_total_month desc')) ## using wt = csv solr_search(cli, params = list(q='*:*', rows=50, fl=c('id','score'), fq='doc_type:full', wt="csv")) solr_search(cli, params = list(q='*:*', rows=50, fl=c('id','score'), fq='doc_type:full')) # using a proxy # cli <- SolrClient$new(host = "api.plos.org", path = "search", port = NULL, # proxy = list(url = "http://186.249.1.146:80")) # solr_search(cli, q='*:*', rows=2, fl='id', callopts=list(verbose=TRUE)) # Pass on curl options to modify request ## verbose solr_search(cli, params = list(q='*:*', rows=2, fl='id'), callopts = list(verbose=TRUE)) } } \references{ See \url{http://wiki.apache.org/solr/#Search_and_Indexing} for more information. } \seealso{ \code{\link[=solr_highlight]{solr_highlight()}}, \code{\link[=solr_facet]{solr_facet()}} } solrium/man/collapse_pivot_names.Rd0000644000176200001440000000124513141431267017220 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/parsers.R \name{collapse_pivot_names} \alias{collapse_pivot_names} \title{Collapse Pivot Field and Value Columns} \usage{ collapse_pivot_names(data) } \arguments{ \item{data}{a \code{data.frame} with every 2 columns representing a field and value and the final representing a count} } \value{ a \code{data.frame} } \description{ Convert a table consisting of columns in sets of 3 into 2 columns assuming that the first column of every set of 3 (field) is duplicated throughout all rows and should be removed. This type of structure is usually returned by facet.pivot responses. } \keyword{internal} solrium/man/collection_create.Rd0000644000176200001440000001136613176213163016476 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/collection_create.R \name{collection_create} \alias{collection_create} \title{Add a collection} \usage{ collection_create(conn, name, numShards = 1, maxShardsPerNode = 1, createNodeSet = NULL, collection.configName = NULL, replicationFactor = 1, router.name = NULL, shards = NULL, createNodeSet.shuffle = TRUE, router.field = NULL, autoAddReplicas = FALSE, async = NULL, raw = FALSE, callopts = list(), ...) } \arguments{ \item{conn}{A solrium connection object, see \link{SolrClient}} \item{name}{(character) The name of the core to be created. Required} \item{numShards}{(integer) The number of shards to be created as part of the collection. This is a required parameter when using the 'compositeId' router.} \item{maxShardsPerNode}{(integer) When creating collections, the shards and/or replicas are spread across all available (i.e., live) nodes, and two replicas of the same shard will never be on the same node. If a node is not live when the CREATE operation is called, it will not get any parts of the new collection, which could lead to too many replicas being created on a single live node. Defining maxShardsPerNode sets a limit on the number of replicas CREATE will spread to each node. If the entire collection can not be fit into the live nodes, no collection will be created at all. Default: 1} \item{createNodeSet}{(logical) Allows defining the nodes to spread the new collection across. If not provided, the CREATE operation will create shard-replica spread across all live Solr nodes. The format is a comma-separated list of node_names, such as localhost:8983_solr, localhost:8984_solr, localhost:8985_solr. Default: \code{NULL}} \item{collection.configName}{(character) Defines the name of the configurations (which must already be stored in ZooKeeper) to use for this collection. If not provided, Solr will default to the collection name as the configuration name. Default: \code{compositeId}} \item{replicationFactor}{(integer) The number of replicas to be created for each shard. Default: 1} \item{router.name}{(character) The router name that will be used. The router defines how documents will be distributed among the shards. The value can be either \code{implicit}, which uses an internal default hash, or \code{compositeId}, which allows defining the specific shard to assign documents to. When using the 'implicit' router, the shards parameter is required. When using the 'compositeId' router, the numShards parameter is required. For more information, see also the section Document Routing. Default: \code{compositeId}} \item{shards}{(character) A comma separated list of shard names, e.g., shard-x,shard-y,shard-z . This is a required parameter when using the 'implicit' router.} \item{createNodeSet.shuffle}{(logical) Controls wether or not the shard-replicas created for this collection will be assigned to the nodes specified by the createNodeSet in a sequential manner, or if the list of nodes should be shuffled prior to creating individual replicas. A 'false' value makes the results of a collection creation predictible and gives more exact control over the location of the individual shard-replicas, but 'true' can be a better choice for ensuring replicas are distributed evenly across nodes. Ignored if createNodeSet is not also specified. Default: \code{TRUE}} \item{router.field}{(character) If this field is specified, the router will look at the value of the field in an input document to compute the hash and identify a shard instead of looking at the uniqueKey field. If the field specified is null in the document, the document will be rejected. Please note that RealTime Get or retrieval by id would also require the parameter \emph{route} (or shard.keys) to avoid a distributed search.} \item{autoAddReplicas}{(logical) When set to true, enables auto addition of replicas on shared file systems. See the section autoAddReplicas Settings for more details on settings and overrides. Default: \code{FALSE}} \item{async}{(character) Request ID to track this action which will be processed asynchronously} \item{raw}{(logical) If \code{TRUE}, returns raw data} \item{callopts}{curl options passed on to \link[crul:HttpClient]{crul::HttpClient}} \item{...}{You can pass in parameters like \code{property.name=value} to set core property name to value. See the section Defining core.properties for details on supported properties and values. (https://lucene.apache.org/solr/guide/7_0/defining-core-properties.html)} } \description{ Add a collection } \examples{ \dontrun{ # connect (cli <- SolrClient$new()) if (!cli$collection_exists("helloWorld")) { cli$collection_create(name = "helloWorld") } if (!cli$collection_exists("tablesChairs")) { cli$collection_create(name = "tablesChairs") } } } solrium/man/pivot_flatten_tabular.Rd0000644000176200001440000000077313141431267017407 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/parsers.R \name{pivot_flatten_tabular} \alias{pivot_flatten_tabular} \title{Flatten facet.pivot responses} \usage{ pivot_flatten_tabular(df_w_pivot) } \arguments{ \item{df_w_pivot}{a \code{data.frame} with another \code{data.frame} nested inside representing a pivot reponse} } \value{ a \code{data.frame} } \description{ Convert a nested hierarchy of facet.pivot elements to tabular data (rows and columns) } \keyword{internal} solrium/man/delete.Rd0000644000176200001440000000375513167521442014267 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/delete.R \name{delete} \alias{delete} \alias{delete_by_id} \alias{delete} \alias{delete_by_query} \title{Delete documents by ID or query} \usage{ delete_by_id(conn, ids, name, commit = TRUE, commit_within = NULL, overwrite = TRUE, boost = NULL, wt = "json", raw = FALSE, ...) delete_by_query(conn, query, name, commit = TRUE, commit_within = NULL, overwrite = TRUE, boost = NULL, wt = "json", raw = FALSE, ...) } \arguments{ \item{conn}{A solrium connection object, see \link{SolrClient}} \item{ids}{Document IDs, one or more in a vector or list} \item{name}{(character) A collection or core name. Required.} \item{commit}{(logical) If \code{TRUE}, documents immediately searchable. Deafult: \code{TRUE}} \item{commit_within}{(numeric) Milliseconds to commit the change, the document will be added within that time. Default: \code{NULL}} \item{overwrite}{(logical) Overwrite documents with matching keys. Default: \code{TRUE}} \item{boost}{(numeric) Boost factor. Default: \code{NULL}} \item{wt}{(character) One of json (default) or xml. If json, uses \code{\link[jsonlite:fromJSON]{jsonlite::fromJSON()}} to parse. If xml, uses \code{\link[xml2:read_xml]{xml2::read_xml()}} to parse} \item{raw}{(logical) If \code{TRUE}, returns raw data in format specified by \code{wt} param} \item{...}{curl options passed on to \link[crul:HttpClient]{crul::HttpClient}} \item{query}{Query to use to delete documents} } \description{ Delete documents by ID or query } \details{ We use json internally as data interchange format for this function. } \examples{ \dontrun{ (cli <- SolrClient$new()) # add some documents first ss <- list(list(id = 1, price = 100), list(id = 2, price = 500)) cli$add(ss, name = "gettingstarted") # Now, delete them # Delete by ID cli$delete_by_id(ids = 1, "gettingstarted") ## Many IDs cli$delete_by_id(ids = c(3, 4), "gettingstarted") # Delete by query cli$delete_by_query(query = "manu:bank", "gettingstarted") } } solrium/man/collection_delete.Rd0000644000176200001440000000135713176213163016474 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/collection_delete.R \name{collection_delete} \alias{collection_delete} \title{Add a collection} \usage{ collection_delete(conn, name, raw = FALSE, callopts = list()) } \arguments{ \item{conn}{A solrium connection object, see \link{SolrClient}} \item{name}{(character) The name of the core to be created. Required} \item{raw}{(logical) If \code{TRUE}, returns raw data} \item{callopts}{curl options passed on to \code{\link[crul]{HttpClient}}} } \description{ Add a collection } \examples{ \dontrun{ (conn <- SolrClient$new()) if (!conn$collection_exists("helloWorld")) { conn$collection_create(name = "helloWorld") } collection_delete(conn, name = "helloWorld") } } solrium/man/update_atomic_xml.Rd0000644000176200001440000000401113167521443016506 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/update_atomic_xml.R \name{update_atomic_xml} \alias{update_atomic_xml} \title{Atomic updates with XML data} \usage{ update_atomic_xml(conn, body, name, wt = "json", raw = FALSE, ...) } \arguments{ \item{conn}{A solrium connection object, see \link{SolrClient}} \item{body}{(character) XML as a character string} \item{name}{(character) Name of the core or collection} \item{wt}{(character) One of json (default) or xml. If json, uses \code{\link[jsonlite:fromJSON]{jsonlite::fromJSON()}} to parse. If xml, uses \code{\link[xml2:read_xml]{xml2::read_xml()}} to parse} \item{raw}{(logical) If \code{TRUE}, returns raw data in format specified by \code{wt} param} \item{...}{curl options passed on to \link[crul:HttpClient]{crul::HttpClient}} } \description{ Atomic updates to parts of Solr documents } \examples{ \dontrun{ # start Solr in Cloud mode: bin/solr start -e cloud -noprompt # connect (conn <- SolrClient$new()) # create a collection if (!conn$collection_exists("books")) { conn$collection_delete("books") conn$collection_create("books") } # Add documents file <- system.file("examples", "books.xml", package = "solrium") cat(readLines(file), sep = "\\n") conn$update_xml(file, "books") # get a document conn$get(ids = '978-0641723445', "books", wt = "xml") # atomic update body <- ' 978-0641723445 mystery 1 ' conn$update_atomic_xml(body, name="books") # get the document again conn$get(ids = '978-0641723445', "books", wt = "xml") # another atomic update body <- ' 978-0641723445 12.5 ' conn$update_atomic_xml(body, "books") # get the document again conn$get(ids = '978-0641723445', "books", wt = "xml") } } \references{ \url{https://lucene.apache.org/solr/guide/7_0/updating-parts-of-documents.html} } solrium/man/collection_deleteshard.Rd0000644000176200001440000000260613176214320017510 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/collection_deleteshard.R \name{collection_deleteshard} \alias{collection_deleteshard} \title{Delete a shard} \usage{ collection_deleteshard(conn, name, shard, raw = FALSE, ...) } \arguments{ \item{conn}{A solrium connection object, see \link{SolrClient}} \item{name}{(character) Required. The name of the collection that includes the shard to be deleted} \item{shard}{(character) Required. The name of the shard to be deleted} \item{raw}{(logical) If \code{TRUE}, returns raw data} \item{...}{curl options passed on to \link[crul:HttpClient]{crul::HttpClient}} } \description{ Deleting a shard will unload all replicas of the shard and remove them from clusterstate.json. It will only remove shards that are inactive, or which have no range given for custom sharding. } \examples{ \dontrun{ (conn <- SolrClient$new()) # create collection if (!conn$collection_exists("buffalo")) { conn$collection_create(name = "buffalo") # OR: bin/solr create -c buffalo } # find shard names names(conn$collection_clusterstatus()$cluster$collections$buffalo$shards) # split a shard by name collection_splitshard(conn, name = "buffalo", shard = "shard1") # now we have three shards names(conn$collection_clusterstatus()$cluster$collections$buffalo$shards) # delete shard conn$collection_deleteshard(name = "buffalo", shard = "shard1_1") } } solrium/man/collection_addreplicaprop.Rd0000644000176200001440000000412413176211100020203 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/collection_addreplicaprop.R \name{collection_addreplicaprop} \alias{collection_addreplicaprop} \title{Add a replica property} \usage{ collection_addreplicaprop(conn, name, shard, replica, property, property.value, shardUnique = FALSE, raw = FALSE, callopts = list()) } \arguments{ \item{conn}{A solrium connection object, see \link{SolrClient}} \item{name}{(character) The name of the core to be created. Required} \item{shard}{(character) Required. The name of the shard the replica belongs to} \item{replica}{(character) Required. The replica, e.g. core_node1.} \item{property}{(character) Required. The property to add. Note: this will have the literal 'property.' prepended to distinguish it from system-maintained properties. So these two forms are equivalent: \code{property=special} and \code{property=property.special}} \item{property.value}{(character) Required. The value to assign to the property} \item{shardUnique}{(logical) If \code{TRUE}, then setting this property in one replica will (1) remove the property from all other replicas in that shard Default: \code{FALSE}} \item{raw}{(logical) If \code{TRUE}, returns raw data} \item{callopts}{curl options passed on to \link[crul:HttpClient]{crul::HttpClient}} } \description{ Assign an arbitrary property to a particular replica and give it the value specified. If the property already exists, it will be overwritten with the new value. } \examples{ \dontrun{ (conn <- SolrClient$new()) # create collection if (!conn$collection_exists("addrep")) { conn$collection_create(name = "addrep", numShards = 1) # OR bin/solr create -c addrep } # status conn$collection_clusterstatus()$cluster$collections$addrep$shards # add the value world to the property hello conn$collection_addreplicaprop(name = "addrep", shard = "shard1", replica = "core_node1", property = "hello", property.value = "world") # check status conn$collection_clusterstatus()$cluster$collections$addrep$shards conn$collection_clusterstatus()$cluster$collections$addrep$shards$shard1$replicas$core_node1 } } solrium/man/core_rename.Rd0000644000176200001440000000303513167507346015302 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/core_rename.R \name{core_rename} \alias{core_rename} \title{Rename a core} \usage{ core_rename(conn, name, other, async = NULL, raw = FALSE, callopts = list()) } \arguments{ \item{conn}{A solrium connection object, see \link{SolrClient}} \item{name}{(character) The name of the core to be created. Required} \item{other}{(character) The new name of the core. Required.} \item{async}{(character) Request ID to track this action which will be processed asynchronously} \item{raw}{(logical) If \code{TRUE}, returns raw data} \item{callopts}{curl options passed on to \link[crul:HttpClient]{crul::HttpClient}} } \description{ Rename a core } \examples{ \dontrun{ # start Solr with Schemaless mode via the schemaless eg: # bin/solr start -e schemaless # you can create a new core like: bin/solr create -c corename # where is the name for your core - or creaate as below # connect (conn <- SolrClient$new()) # Status of particular cores path <- "~/solr-7.0.0/server/solr/testcore/conf" dir.create(path, recursive = TRUE) files <- list.files( "~/solr-7.0.0/server/solr/configsets/sample_techproducts_configs/conf/", full.names = TRUE) invisible(file.copy(files, path, recursive = TRUE)) conn$core_create("testcore") # or create in CLI: bin/solr create -c testcore # rename conn$core_rename("testcore", "newtestcore") ## status conn$core_status("testcore") # core missing conn$core_status("newtestcore", FALSE) # not missing # cleanup conn$core_unload("newtestcore") } } solrium/man/solr_stats.Rd0000644000176200001440000000635013176437707015226 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/solr_stats.r \name{solr_stats} \alias{solr_stats} \title{Solr stats} \usage{ solr_stats(conn, name = NULL, params = list(q = "*:*", stats.field = NULL, stats.facet = NULL), body = NULL, callopts = list(), raw = FALSE, parsetype = "df", ...) } \arguments{ \item{conn}{A solrium connection object, see \link{SolrClient}} \item{name}{Name of a collection or core. Or leave as \code{NULL} if not needed.} \item{params}{(list) a named list of parameters, results in a GET reqeust as long as no body parameters given} \item{body}{(list) a named list of parameters, if given a POST request will be performed} \item{callopts}{Call options passed on to [crul::HttpClient]} \item{raw}{(logical) If TRUE, returns raw data in format specified by wt param} \item{parsetype}{(character) One of 'list' or 'df'} \item{...}{Further args to be combined into query} } \value{ XML, JSON, a list, or data.frame } \description{ Returns only stat items } \section{Stats parameters}{ \itemize{ \item q Query terms, defaults to '*:*', or everything. \item stats.field The number of similar documents to return for each result. \item stats.facet You can not facet on multi-valued fields. \item wt (character) Data type returned, defaults to 'json'. One of json or xml. If json, uses \code{\link[jsonlite]{fromJSON}} to parse. If xml, uses \code{\link[XML]{xmlParse}} to parse. csv is only supported in \code{\link{solr_search}} and \code{\link{solr_all}}. \item start Record to start at, default to beginning. \item rows Number of records to return. Defaults to 10. \item key API key, if needed. } } \examples{ \dontrun{ # connect (cli <- SolrClient$new(host = "api.plos.org", path = "search", port = NULL)) # get stats solr_stats(cli, params = list(q='science', stats.field='counter_total_all'), raw=TRUE) solr_stats(cli, params = list(q='title:"ecology" AND body:"cell"', stats.field=c('counter_total_all','alm_twitterCount'))) solr_stats(cli, params = list(q='ecology', stats.field=c('counter_total_all','alm_twitterCount'), stats.facet='journal')) solr_stats(cli, params = list(q='ecology', stats.field=c('counter_total_all','alm_twitterCount'), stats.facet=c('journal','volume'))) # Get raw data, then parse later if you feel like it ## json out <- solr_stats(cli, params = list(q='ecology', stats.field=c('counter_total_all','alm_twitterCount'), stats.facet=c('journal','volume')), raw=TRUE) library("jsonlite") jsonlite::fromJSON(out) solr_parse(out) # list solr_parse(out, 'df') # data.frame ## xml out <- solr_stats(cli, params = list(q='ecology', stats.field=c('counter_total_all','alm_twitterCount'), stats.facet=c('journal','volume'), wt="xml"), raw=TRUE) library("xml2") xml2::read_xml(unclass(out)) solr_parse(out) # list solr_parse(out, 'df') # data.frame # Get verbose http call information solr_stats(cli, params = list(q='ecology', stats.field='alm_twitterCount'), callopts=list(verbose=TRUE)) } } \references{ See \url{http://wiki.apache.org/solr/StatsComponent} for more information on Solr stats. } \seealso{ \code{\link[=solr_highlight]{solr_highlight()}}, \code{\link[=solr_facet]{solr_facet()}}, \code{\link[=solr_search]{solr_search()}}, \code{\link[=solr_mlt]{solr_mlt()}} } solrium/man/collection_splitshard.Rd0000644000176200001440000000261213176243013017377 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/collection_splitshard.R \name{collection_splitshard} \alias{collection_splitshard} \title{Create a shard} \usage{ collection_splitshard(conn, name, shard, ranges = NULL, split.key = NULL, async = NULL, raw = FALSE, callopts = list()) } \arguments{ \item{conn}{A solrium connection object, see \link{SolrClient}} \item{name}{(character) The name of the core to be created. Required} \item{shard}{(character) Required. The name of the shard to be split} \item{ranges}{(character) A comma-separated list of hash ranges in hexadecimal e.g. ranges=0-1f4,1f5-3e8,3e9-5dc} \item{split.key}{(character) The key to use for splitting the index} \item{async}{(character) Request ID to track this action which will be processed asynchronously} \item{raw}{(logical) If \code{TRUE}, returns raw data} \item{callopts}{curl options passed on to \link[crul:HttpClient]{crul::HttpClient}} } \description{ Create a shard } \examples{ \dontrun{ (conn <- SolrClient$new()) # create collection if (!conn$collection_exists("trees")) { conn$collection_create("trees") } # find shard names names(conn$collection_clusterstatus()$cluster$collections$trees$shards) # split a shard by name conn$collection_splitshard(name = "trees", shard = "shard1") # now we have three shards names(conn$collection_clusterstatus()$cluster$collections$trees$shards) } } solrium/man/config_set.Rd0000644000176200001440000000272013167507346015143 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/config_set.R \name{config_set} \alias{config_set} \title{Set Solr configuration details} \usage{ config_set(conn, name, set = NULL, unset = NULL, ...) } \arguments{ \item{conn}{A solrium connection object, see \link{SolrClient}} \item{name}{(character) The name of the core. If not given, all cores.} \item{set}{(list) List of key:value pairs of what to set. Default: NULL (nothing passed)} \item{unset}{(list) One or more character strings of keys to unset. Default: NULL (nothing passed)} \item{...}{curl options passed on to \link[crul:HttpClient]{crul::HttpClient}} } \value{ A list with response from server } \description{ Set Solr configuration details } \examples{ \dontrun{ # start Solr with Cloud mode via the schemaless eg: bin/solr -e cloud # you can create a new core like: bin/solr create -c corename # where is the name for your core - or creaate as below # connect (conn <- SolrClient$new()) # set a property conn$config_set("gettingstarted", set = list(query.filterCache.autowarmCount = 1000)) # unset a property conn$config_set("gettingstarted", unset = "query.filterCache.size", verbose = TRUE) # both set a property and unset a property conn$config_set("gettingstarted", unset = "enableLazyFieldLoading") # many properties conn$config_set("gettingstarted", set = list( query.filterCache.autowarmCount = 1000, query.commitWithin.softCommit = 'false' ) ) } } solrium/man/update_atomic_json.Rd0000644000176200001440000000350613167521443016667 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/update_atomic_json.R \name{update_atomic_json} \alias{update_atomic_json} \title{Atomic updates with JSON data} \usage{ update_atomic_json(conn, body, name, wt = "json", raw = FALSE, ...) } \arguments{ \item{conn}{A solrium connection object, see \link{SolrClient}} \item{body}{(character) JSON as a character string} \item{name}{(character) Name of the core or collection} \item{wt}{(character) One of json (default) or xml. If json, uses \code{\link[jsonlite:fromJSON]{jsonlite::fromJSON()}} to parse. If xml, uses \code{\link[xml2:read_xml]{xml2::read_xml()}} to parse} \item{raw}{(logical) If \code{TRUE}, returns raw data in format specified by \code{wt} param} \item{...}{curl options passed on to \link[crul:HttpClient]{crul::HttpClient}} } \description{ Atomic updates to parts of Solr documents } \examples{ \dontrun{ # start Solr in Cloud mode: bin/solr start -e cloud -noprompt # connect (conn <- SolrClient$new()) # create a collection if (!conn$collection_exists("books")) { conn$collection_delete("books") conn$collection_create("books") } # Add documents file <- system.file("examples", "books2.json", package = "solrium") cat(readLines(file), sep = "\\n") conn$update_json(file, "books") # get a document conn$get(ids = 343334534545, "books") # atomic update body <- '[{ "id": "343334534545", "genre_s": {"set": "mystery" }, "pages_i": {"inc": 1 } }]' conn$update_atomic_json(body, "books") # get the document again conn$get(ids = 343334534545, "books") # another atomic update body <- '[{ "id": "343334534545", "price": {"remove": "12.5" } }]' conn$update_atomic_json(body, "books") # get the document again conn$get(ids = 343334534545, "books") } } \references{ \url{https://lucene.apache.org/solr/guide/7_0/updating-parts-of-documents.html} } solrium/man/solr_all.Rd0000644000176200001440000001425513176421234014627 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/solr_all.r \name{solr_all} \alias{solr_all} \title{All purpose search} \usage{ solr_all(conn, name = NULL, params = NULL, body = NULL, callopts = list(), raw = FALSE, parsetype = "df", concat = ",", optimizeMaxRows = TRUE, minOptimizedRows = 50000L, ...) } \arguments{ \item{conn}{A solrium connection object, see \link{SolrClient}} \item{name}{Name of a collection or core. Or leave as \code{NULL} if not needed.} \item{params}{(list) a named list of parameters, results in a GET reqeust as long as no body parameters given} \item{body}{(list) a named list of parameters, if given a POST request will be performed} \item{callopts}{Call options passed on to [crul::HttpClient]} \item{raw}{(logical) If TRUE, returns raw data in format specified by wt param} \item{parsetype}{(character) One of 'list' or 'df'} \item{concat}{(character) Character to concatenate elements of longer than length 1. Note that this only works reliably when data format is json (wt='json'). The parsing is more complicated in XML format, but you can do that on your own.} \item{optimizeMaxRows}{(logical) If \code{TRUE}, then rows parameter will be adjusted to the number of returned results by the same constraints. It will only be applied if rows parameter is higher than \code{minOptimizedRows}. Default: \code{TRUE}} \item{minOptimizedRows}{(numeric) used by \code{optimizedMaxRows} parameter, the minimum optimized rows. Default: 50000} \item{...}{Further args to be combined into query} } \value{ XML, JSON, a list, or data.frame } \description{ Includes documents, facets, groups, mlt, stats, and highlights } \section{Parameters}{ \itemize{ \item q Query terms, defaults to '*:*', or everything. \item sort Field to sort on. You can specify ascending (e.g., score desc) or descending (e.g., score asc), sort by two fields (e.g., score desc, price asc), or sort by a function (e.g., sum(x_f, y_f) desc, which sorts by the sum of x_f and y_f in a descending order). \item start Record to start at, default to beginning. \item rows Number of records to return. Default: 10. \item pageDoc If you expect to be paging deeply into the results (say beyond page 10, assuming rows=10) and you are sorting by score, you may wish to add the pageDoc and pageScore parameters to your request. These two parameters tell Solr (and Lucene) what the last result (Lucene internal docid and score) of the previous page was, so that when scoring the query for the next set of pages, it can ignore any results that occur higher than that item. To get the Lucene internal doc id, you will need to add [docid] to the &fl list. e.g., q=*:*&start=10&pageDoc=5&pageScore=1.345&fl=[docid],score \item pageScore See pageDoc notes. \item fq Filter query, this does not affect the search, only what gets returned. This parameter can accept multiple items in a lis or vector. You can't pass more than one parameter of the same name, so we get around it by passing multiple queries and we parse internally \item fl Fields to return, can be a character vector like \code{c('id', 'title')}, or a single character vector with one or more comma separated names, like \code{'id,title'} \item defType Specify the query parser to use with this request. \item timeAllowed The time allowed for a search to finish. This value only applies to the search and not to requests in general. Time is in milliseconds. Values <= 0 mean no time restriction. Partial results may be returned (if there are any). \item qt Which query handler used. Options: dismax, others? \item NOW Set a fixed time for evaluating Date based expresions \item TZ Time zone, you can override the default. \item echoHandler If \code{TRUE}, Solr places the name of the handle used in the response to the client for debugging purposes. Default: \item echoParams The echoParams parameter tells Solr what kinds of Request parameters should be included in the response for debugging purposes, legal values include: \itemize{ \item none - don't include any request parameters for debugging \item explicit - include the parameters explicitly specified by the client in the request \item all - include all parameters involved in this request, either specified explicitly by the client, or implicit because of the request handler configuration. } \item wt (character) One of json, xml, or csv. Data type returned, defaults to 'csv'. If json, uses [jsonlite::fromJSON()] to parse. If xml, uses [xml2::read_xml()] to parse. If csv, uses [read.table()] to parse. `wt=csv` gives the fastest performance at least in all the cases we have tested in, thus it's the default value for `wt` } } \examples{ \dontrun{ # connect (cli <- SolrClient$new(host = "api.plos.org", path = "search", port = NULL)) solr_all(cli, params = list(q='*:*', rows=2, fl='id')) # facets solr_all(cli, params = list(q='*:*', rows=2, fl='id', facet="true", facet.field="journal")) # mlt solr_all(cli, params = list(q='ecology', rows=2, fl='id', mlt='true', mlt.count=2, mlt.fl='abstract')) # facets and mlt solr_all(cli, params = list(q='ecology', rows=2, fl='id', facet="true", facet.field="journal", mlt='true', mlt.count=2, mlt.fl='abstract')) # stats solr_all(cli, params = list(q='ecology', rows=2, fl='id', stats='true', stats.field='counter_total_all')) # facets, mlt, and stats solr_all(cli, params = list(q='ecology', rows=2, fl='id', facet="true", facet.field="journal", mlt='true', mlt.count=2, mlt.fl='abstract', stats='true', stats.field='counter_total_all')) # group solr_all(cli, params = list(q='ecology', rows=2, fl='id', group='true', group.field='journal', group.limit=3)) # facets, mlt, stats, and groups solr_all(cli, params = list(q='ecology', rows=2, fl='id', facet="true", facet.field="journal", mlt='true', mlt.count=2, mlt.fl='abstract', stats='true', stats.field='counter_total_all', group='true', group.field='journal', group.limit=3)) # using wt = xml solr_all(cli, params = list(q='*:*', rows=50, fl=c('id','score'), fq='doc_type:full', wt="xml"), raw=TRUE) } } \references{ See \url{http://wiki.apache.org/solr/#Search_and_Indexing} for more information. } \seealso{ \code{\link[=solr_highlight]{solr_highlight()}}, \code{\link[=solr_facet]{solr_facet()}} } solrium/man/update_csv.Rd0000644000176200001440000001305313176206024015146 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/update_csv.R \name{update_csv} \alias{update_csv} \title{Update documents with CSV data} \usage{ update_csv(conn, files, name, separator = ",", header = TRUE, fieldnames = NULL, skip = NULL, skipLines = 0, trim = FALSE, encapsulator = NULL, escape = NULL, keepEmpty = FALSE, literal = NULL, map = NULL, split = NULL, rowid = NULL, rowidOffset = NULL, overwrite = NULL, commit = NULL, wt = "json", raw = FALSE, ...) } \arguments{ \item{conn}{A solrium connection object, see \link{SolrClient}} \item{files}{Path to a single file to load into Solr} \item{name}{(character) Name of the core or collection} \item{separator}{Specifies the character to act as the field separator. Default: ','} \item{header}{TRUE if the first line of the CSV input contains field or column names. Default: \code{TRUE}. If the fieldnames parameter is absent, these field names will be used when adding documents to the index.} \item{fieldnames}{Specifies a comma separated list of field names to use when adding documents to the Solr index. If the CSV input already has a header, the names specified by this parameter will override them. Example: fieldnames=id,name,category} \item{skip}{A comma separated list of field names to skip in the input. An alternate way to skip a field is to specify it's name as a zero length string in fieldnames. For example, \code{fieldnames=id,name,category&skip=name} skips the name field, and is equivalent to \code{fieldnames=id,,category}} \item{skipLines}{Specifies the number of lines in the input stream to discard before the CSV data starts (including the header, if present). Default: \code{0}} \item{trim}{If true remove leading and trailing whitespace from values. CSV parsing already ignores leading whitespace by default, but there may be trailing whitespace, or there may be leading whitespace that is encapsulated by quotes and is thus not removed. This may be specified globally, or on a per-field basis. Default: \code{FALSE}} \item{encapsulator}{The character optionally used to surround values to preserve characters such as the CSV separator or whitespace. This standard CSV format handles the encapsulator itself appearing in an encapsulated value by doubling the encapsulator.} \item{escape}{The character used for escaping CSV separators or other reserved characters. If an escape is specified, the encapsulator is not used unless also explicitly specified since most formats use either encapsulation or escaping, not both.} \item{keepEmpty}{Keep and index empty (zero length) field values. This may be specified globally, or on a per-field basis. Default: \code{FALSE}} \item{literal}{Adds fixed field name/value to all documents. Example: Adds a "datasource" field with value equal to "products" for every document indexed from the CSV \code{literal.datasource=products}} \item{map}{Specifies a mapping between one value and another. The string on the LHS of the colon will be replaced with the string on the RHS. This parameter can be specified globally or on a per-field basis. Example: replaces "Absolutely" with "true" in every field \code{map=Absolutely:true}. Example: removes any values of "RemoveMe" in the field "foo" \code{f.foo.map=RemoveMe:&f.foo.keepEmpty=false }} \item{split}{If TRUE, the field value is split into multiple values by another CSV parser. The CSV parsing rules such as separator and encapsulator may be specified as field parameters. See \url{https://wiki.apache.org/solr/UpdateCSV#split} for examples.} \item{rowid}{If not null, add a new field to the document where the passed in parameter name is the field name to be added and the current line/rowid is the value. This is useful if your CSV doesn't have a unique id already in it and you want to use the line number as one. Also useful if you simply want to index where exactly in the original CSV file the row came from} \item{rowidOffset}{In conjunction with the rowid parameter, this integer value will be added to the rowid before adding it the field.} \item{overwrite}{If true (the default), check for and overwrite duplicate documents, based on the uniqueKey field declared in the solr schema. If you know the documents you are indexing do not contain any duplicates then you may see a considerable speed up with &overwrite=false.} \item{commit}{Commit changes after all records in this request have been indexed. The default is commit=false to avoid the potential performance impact of frequent commits.} \item{wt}{(character) One of json (default) or xml. If json, uses \code{\link[jsonlite]{fromJSON}} to parse. If xml, uses \code{\link[xml2]{read_xml}} to parse} \item{raw}{(logical) If TRUE, returns raw data in format specified by \code{wt} param} \item{...}{curl options passed on to \link[crul:HttpClient]{crul::HttpClient}} } \description{ Update documents with CSV data } \note{ SOLR v1.2 was first version to support csv. See \url{https://issues.apache.org/jira/browse/SOLR-66} } \examples{ \dontrun{ # start Solr: bin/solr start -f -c -p 8983 # connect (cli <- SolrClient$new()) if (!cli$collection_exists("helloWorld")) { cli$collection_create(name = "helloWorld", numShards = 2) } df <- data.frame(id=1:3, name=c('red', 'blue', 'green')) write.csv(df, file="df.csv", row.names=FALSE, quote = FALSE) conn$update_csv("df.csv", "helloWorld", verbose = TRUE) # give back raw xml conn$update_csv("df.csv", "helloWorld", wt = "xml") ## raw json conn$update_csv("df.csv", "helloWorld", wt = "json", raw = TRUE) } } \seealso{ Other update: \code{\link{update_json}}, \code{\link{update_xml}} } solrium/man/makemultiargs.Rd0000644000176200001440000000060213167507346015665 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/zzz.r \name{makemultiargs} \alias{makemultiargs} \title{Function to make make multiple args of the same name from a single input with length > 1} \usage{ makemultiargs(x) } \arguments{ \item{x}{Value} } \description{ Function to make make multiple args of the same name from a single input with length > 1 } solrium/man/ping.Rd0000644000176200001440000000306613167507346013764 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/ping.R \name{ping} \alias{ping} \title{Ping a Solr instance} \usage{ ping(conn, name, wt = "json", raw = FALSE, ...) } \arguments{ \item{conn}{A solrium connection object, see \link{SolrClient}} \item{name}{(character) Name of a collection or core. Required.} \item{wt}{(character) One of json (default) or xml. If json, uses \code{\link[jsonlite:fromJSON]{jsonlite::fromJSON()}} to parse. If xml, uses [xml2::read_xml)] to parse [xml2::read_xml)]: R:xml2::read_xml)} \item{raw}{(logical) If \code{TRUE}, returns raw data in format specified by \code{wt} param} \item{...}{curl options passed on to \link[crul:HttpClient]{crul::HttpClient}} } \value{ if \code{wt="xml"} an object of class \code{xml_document}, if \code{wt="json"} an object of class \code{list} } \description{ Ping a Solr instance } \details{ You likely may not be able to run this function against many public Solr services as they hopefully don't expose their admin interface to the public, but works locally. } \examples{ \dontrun{ # start Solr, in your CLI, run: `bin/solr start -e cloud -noprompt` # after that, if you haven't run `bin/post -c gettingstarted docs/` yet, # do so # connect: by default we connect to localhost, port 8983 (cli <- SolrClient$new()) # ping the gettingstarted index cli$ping("gettingstarted") ping(cli, "gettingstarted") ping(cli, "gettingstarted", wt = "xml") ping(cli, "gettingstarted", verbose = FALSE) ping(cli, "gettingstarted", raw = TRUE) ping(cli, "gettingstarted", wt="xml", verbose = TRUE) } } solrium/LICENSE0000644000176200001440000000005713044434464012761 0ustar liggesusersYEAR: 2017 COPYRIGHT HOLDER: Scott Chamberlain