jsonlite/ 0000755 0001762 0000144 00000000000 12626264003 012102 5 ustar ligges users jsonlite/inst/ 0000755 0001762 0000144 00000000000 12540777273 013074 5 ustar ligges users jsonlite/inst/CITATION 0000644 0001762 0000144 00000001132 12540777273 014226 0 ustar ligges users citHeader("To cite jsonlite in publications use:") citEntry(entry = "Article", title = "The jsonlite Package: A Practical and Consistent Mapping Between JSON Data and R Objects", author = personList(as.person("Jeroen Ooms")), journal = "arXiv:1403.2805 [stat.CO]", year = "2014", url = "http://arxiv.org/abs/1403.2805", textVersion = paste("Jeroen Ooms (2014).", "The jsonlite Package: A Practical and Consistent Mapping Between JSON Data and R Objects.", "arXiv:1403.2805 [stat.CO]", "URL http://arxiv.org/abs/1403.2805.") ) jsonlite/inst/tests/ 0000755 0001762 0000144 00000000000 12540777273 014236 5 ustar ligges users jsonlite/inst/tests/test-libjson-utf8.R 0000644 0001762 0000144 00000002434 12540777273 017665 0 ustar ligges users context("libjson UTF-8 characters") # Some notes: JSON defines UTF-8 as the default charset. Therefore all encoders and # decoders are required to support UTF-8. JSON also allows for escaped unicode, i.e # \u00F8 however this is mostly for legacy purposes. Using actual UTF-8 characters # is easier and more efficient. test_that("test that non ascii characters are ok", { #create random strings objects <- list( "Zürich", "北京填鴨们", "ผัดไทย", "寿司", c("寿司", "Zürich", "foo") ); lapply(objects, function(x){ Encoding(x) <- "UTF-8" myjson <- toJSON(x, pretty=TRUE); expect_that(validate(myjson), is_true()); expect_that(fromJSON(myjson), equals(x)); #prettify needs to parse + output prettyjson <- prettify(myjson); expect_that(validate(prettyjson), is_true()); expect_that(fromJSON(prettyjson), equals(x)); }); #Test escaped unicode characters expect_that(fromJSON('["Z\\u00FCrich"]'), equals("Z\u00fcrich")); expect_that(fromJSON(prettify('["Z\\u00FCrich"]')), equals("Z\u00fcrich")); expect_that(length(unique(fromJSON('["Z\\u00FCrich", "Z\u00fcrich"]'))), equals(1L)) expect_that(fromJSON('["\\u586B"]'), equals("\u586b")); expect_that(fromJSON(prettify('["\\u586B"]')), equals("\u586B")); }); jsonlite/inst/tests/test-toJSON-keep-vec-names.R 0000644 0001762 0000144 00000002575 12540777273 021261 0 ustar ligges users context("toJSON keep_vec_names") test_that("keep_vec_names with named vectors", { # Basic types should give messages # Length-1 vectors expect_message(expect_equal(toJSON2(c(a=1)), '{"a":1}')) expect_message(expect_equal(toJSON2(c(a="x")), '{"a":"x"}')) expect_message(expect_equal(toJSON2(c(a=TRUE)), '{"a":true}')) # Longer vectors expect_message(expect_equal(toJSON2(c(a=1,b=2)), '{"a":1,"b":2}')) expect_message(expect_equal(toJSON2(c(a="x",b="y")), '{"a":"x","b":"y"}')) expect_message(expect_equal(toJSON2(c(a=FALSE,b=TRUE)), '{"a":false,"b":true}')) # Some other types expect_message(expect_equal(toJSON2(factor(c(a="x"))), '{"a":"x"}')) expect_message(expect_equal(toJSON2(c(a=as.Date("2015-01-01"))), '{"a":"2015-01-01"}')) expect_message(expect_equal(toJSON2(c(a=as.POSIXct("2015-01-01 3:00:00"))), '{"a":"2015-01-01 03:00:00"}')) expect_message(expect_equal(toJSON2(c(a=as.POSIXlt("2015-01-01 3:00:00"))), '{"a":"2015-01-01 03:00:00"}')) # keep_vec_names shouldn't affect unnamed vectors expect_equal(toJSON2(1), '1') expect_equal(toJSON2(c(1:3)), '[1,2,3]') }) # Data frames generally don't allow named columns, except in very unusual cases test_that("keep_vec_names with data frames", { expect_equal(toJSON3(data.frame(x=c(a=1), y=2)), '{"x":[1],"y":[2]}') expect_equal(toJSON3(data.frame(x=c(a=1,b=2), y=c(c=3,d=4))), '{"x":[1,2],"y":[3,4]}') }) jsonlite/inst/tests/test-toJSON-NA-values.R 0000644 0001762 0000144 00000001005 12540777273 020237 0 ustar ligges users context("toJSON NA values") test_that("Test NA values", { options(stringsAsFactors=FALSE) x <- list(foo=c(TRUE, NA, FALSE, TRUE), bar=c(3.14,NA, 42, NA), zoo=c(NA, "bla", "boe", NA)) x$mydf <- data.frame(col1=c(FALSE, NA, NA, TRUE), col2=c(1.23, NA, 23, NA)) x$mydf$mylist <- list(c(TRUE, NA, FALSE, NA), NA, c("blabla", NA), c(NA,12,13,NA,NA,NA,1001)) expect_that(validate(toJSON(x)), is_true()) expect_that(fromJSON(toJSON(x)), equals(x)) expect_that(fromJSON(toJSON(x, na="null")), equals(x)) }); jsonlite/inst/tests/test-libjson-escaping.R 0000644 0001762 0000144 00000001547 12540777273 020574 0 ustar ligges users context("libjson Escaping") test_that("escaping and parsing of special characters", { #create random strings mychars <- c('a', 'b', " ", '"', "\\", "\t", "\n", "'", "/", "#", "$"); createstring <- function(length){ paste(mychars[ceiling(runif(length, 0, length(mychars)))], collapse="") } #generate 1000 random strings for(i in 1:200){ x <- createstring(i); expect_that(x, equals(fromJSON(toJSON(x)))); expect_that(x, equals(fromJSON(toJSON(x, pretty=TRUE)))); y <- setNames(list(123), x) expect_that(x, equals(fromJSON(toJSON(x, pretty=TRUE)))); } }); test_that("filter invalid escape characters", { #The \v and \a characters are not supported by JSON. This is a common bug #expect_that(validate(toJSON("foo\v\bar\abaz")), is_true()); #Update: yajl doesn't support \v and \a characters at all. Dropping this test. }); jsonlite/inst/tests/test-serializeJSON-types.R 0000644 0001762 0000144 00000002045 12540777273 021162 0 ustar ligges users #test serializeJSON context("Serializing Data Types") # Note about numeric precision # In the unit tests we use digits=10. Lowever values will result in problems for some datasets test_that("Serializing Data Objects", { objects <- list( NULL, readBin(system.file(package="base", "Meta/package.rds"), "raw", 999), c(TRUE, FALSE, NA, FALSE), c(1L, NA, 9999999), c(round(pi, 4), NA, NaN, Inf, -Inf), c("foo", NA, "bar"), complex(real=1:10, imaginary=1001:1010), Reaction ~ Days + (1|Subject) + (0+Days|Subject), as.name("cars"), as.pairlist(mtcars), quote(rnorm(10)), expression("to be or not to be"), expression(foo), parse(text="rnorm(10);"), call("rnorm", n=10), emptyenv(), `if`, #builtin `list`, #special getNamespace("graphics") #namespace ) #test all but list lapply(objects, function(object){ expect_that(unserializeJSON(serializeJSON(object)), equals(object)) }); #test all in list expect_that(unserializeJSON(serializeJSON(objects)), equals(objects)) }); jsonlite/inst/tests/test-fromJSON-dataframe.R 0000644 0001762 0000144 00000003611 12540777273 020716 0 ustar ligges users context("fromJSON dataframes") options(stringsAsFactors=FALSE); test_that("recover nested data frames", { x1 <- x2 <- x3 <- x4 <- x5 <- x6 <- data.frame(foo=c(1:2)); x2$bar <- c("jeroen", "eli"); x3$bar <- x4$bar <- x5$bar <- x6$bar <- data.frame(name=c("jeroen", "eli")) x4$bar$age <- x5$bar$age <- c(28, 24); x6$bar$age <- c(28, NA); x5$bar$food <- data.frame(yum=c("Rice", "Pasta")); x6$bar$food <- data.frame(yum=c(NA, "Pasta")); #add to list objects <- list(x1, x2, x3, x4, x5, x6) #test all but list lapply(objects, function(object){ expect_that(fromJSON(toJSON(object)), equals(object)) expect_that(fromJSON(toJSON(object, na="null")), equals(object)) expect_that(names(fromJSON(toJSON(object), flatten = TRUE)), equals(names(unlist(object[1,,drop=FALSE])))) }); #test all in list expect_that(fromJSON(toJSON(objects)), equals(objects)) }); test_that("recover lists in data frames", { x <- data.frame(author = c("Homer", "Virgil", "Jeroen")); x$poems = list(c("Iliad", "Odyssey"), c("Eclogues", "Georgics", "Aeneid"), character()); y <- data.frame(author = c("Homer", "Virgil", "Jeroen")); y$poems = list( data.frame(title=c("Iliad", "Odyssey"), year=c(-1194, -800)), data.frame(title=c("Eclogues", "Georgics", "Aeneid"), year=c(-44, -29, -19)), data.frame() ); z <- list(x=x, y=y); zz <- list(x,y); expect_that(fromJSON(toJSON(x)), equals(x)) expect_that(fromJSON(toJSON(y)), equals(y)) expect_that(fromJSON(toJSON(z)), equals(z)) expect_that(fromJSON(toJSON(zz)), equals(zz)) }); #note: nested matrix does not perfectly restore test_that("nested matrix in data frame", { x <- data.frame(foo=1:2) x$bar <- matrix(c(1:5, NA), 2) expect_that(validate(toJSON(x)), is_true()) y <- fromJSON(toJSON(x)) expect_that(y, is_a("data.frame")) expect_that(names(x), equals(names(y))) expect_that(length(y[[1,"bar"]]), equals(3)) }); jsonlite/inst/tests/test-toJSON-Date.R 0000644 0001762 0000144 00000002310 12540777273 017321 0 ustar ligges users context("toJSON Date") object <- as.Date("1985-06-18"); test_that("Encoding Date Objects", { expect_that(toJSON(object), equals("[\"1985-06-18\"]")); expect_that(toJSON(object, Date="ISO8601"), equals("[\"1985-06-18\"]")); expect_that(toJSON(object, Date="epoch"), equals("[5647]")); expect_that(toJSON(object, Date="adsfdsfds"), throws_error("should be one of")); }); test_that("Encoding Date Objects in a list", { expect_that(toJSON(list(foo=object)), equals("{\"foo\":[\"1985-06-18\"]}")); expect_that(toJSON(list(foo=object), Date="ISO8601"), equals("{\"foo\":[\"1985-06-18\"]}")); expect_that(toJSON(list(foo=object), Date="epoch"), equals("{\"foo\":[5647]}")); expect_that(toJSON(list(foo=object), Date="adsfdsfds"), throws_error("should be one of")); }); test_that("Encoding Date Objects in a Data frame", { expect_that(toJSON(data.frame(foo=object)), equals("[{\"foo\":\"1985-06-18\"}]")); expect_that(toJSON(data.frame(foo=object), Date="ISO8601"), equals("[{\"foo\":\"1985-06-18\"}]")); expect_that(toJSON(data.frame(foo=object), Date="epoch"), equals("[{\"foo\":5647}]")); expect_that(toJSON(data.frame(foo=object), Date="adsfdsfds"), throws_error("should be one of")); }); jsonlite/inst/tests/helper-toJSON.R 0000644 0001762 0000144 00000000421 12540777273 016747 0 ustar ligges users toJSON <- function(...){ unclass(minify(jsonlite::toJSON(...))) } toJSON2 <- function(x) { toJSON(x, keep_vec_names = TRUE, auto_unbox = TRUE) } toJSON3 <- function(x) { toJSON(x, keep_vec_names = TRUE, auto_unbox = TRUE, dataframe = "columns", rownames = FALSE) } jsonlite/inst/tests/test-fromJSON-array.R 0000644 0001762 0000144 00000003030 12540777273 020103 0 ustar ligges users context("fromJSON Array") test_that("fromJSON Array, row major", { # test high dimensional arrays lapply(2:5, function(n){ object <- array(1:prod(n), dim=1:n) newobject <- fromJSON(toJSON(object)); expect_that(object, equals(newobject)); }); # adding some flat dimensions lapply(1:5, function(n){ object <- array(1:prod(n), dim=c(1:n, 1)) newobject <- fromJSON(toJSON(object)); expect_that(object, equals(newobject)); }); }); test_that("fromJSON Array, column major", { # test high dimensional arrays lapply(2:5, function(n){ object <- array(1:prod(n), dim=1:n) newobject <- fromJSON(toJSON(object, matrix="columnmajor"), columnmajor=TRUE); expect_that(object, equals(newobject)); }); # adding some flat dimensions lapply(1:5, function(n){ object <- array(1:prod(n), dim=c(1:n, 1)) newobject <- fromJSON(toJSON(object, matrix="columnmajor"), columnmajor=TRUE); expect_that(object, equals(newobject)); }); }); test_that("fromJSON Array, character strings", { # test high dimensional arrays lapply(2:5, function(n){ object <- array(paste("cell", 1:prod(n)), dim=1:n) newobject <- fromJSON(toJSON(object, matrix="columnmajor"), columnmajor=TRUE); expect_that(object, equals(newobject)); }); # adding some flat dimensions lapply(1:5, function(n){ object <- array(paste("cell", 1:prod(n)), dim=c(1:n, 1)) newobject <- fromJSON(toJSON(object, matrix="columnmajor"), columnmajor=TRUE); expect_that(object, equals(newobject)); }); }); jsonlite/inst/tests/test-serializeJSON-functions.R 0000644 0001762 0000144 00000001154 12540777273 022026 0 ustar ligges users #test serializeJSON context("Serializing Functions") # Note about numeric precision # In the unit tests we use digits=10. Lowever values will result in problems for some datasets test_that("Serializing Functions", { options(keep.source=FALSE); objects <- list( function(x = 0) { x + 1 }, function(x) { x + 1 }, function(x, ...) { x + 1}, lm ); #test all but list lapply(objects, function(object){ expect_that(unserializeJSON(serializeJSON(object)), equals(object)) }); #test all in list expect_that(unserializeJSON(serializeJSON(objects)), equals(objects)) }); jsonlite/inst/tests/test-toJSON-factor.R 0000644 0001762 0000144 00000000573 12540777273 017733 0 ustar ligges users context("toJSON Factor") test_that("Encoding Factor Objects", { expect_that(fromJSON(toJSON(iris$Species)), is_identical_to(as.character(iris$Species))); expect_that(fromJSON(toJSON(iris$Species[1])), is_identical_to(as.character(iris$Species[1]))); expect_that(fromJSON(toJSON(iris$Species, factor="integer")), equals(structure(unclass(iris$Species), levels=NULL))); }); jsonlite/inst/tests/test-toJSON-logical.R 0000644 0001762 0000144 00000002145 12540777273 020064 0 ustar ligges users context("toJSON Logical") test_that("Encoding Logical", { expect_that(toJSON(TRUE), equals("[true]")); expect_that(toJSON(FALSE), equals("[false]")); expect_that(toJSON(as.logical(NA)), equals("[null]")) expect_that(toJSON(as.logical(NA), na="string"), equals("[\"NA\"]")) expect_that(toJSON(c(TRUE, NA, FALSE)), equals("[true,null,false]")); expect_that(toJSON(c(TRUE, NA, FALSE), na="string"), equals("[true,\"NA\",false]")); expect_that(toJSON(logical()), equals("[]")); }); test_that("Encoding Logical in Data Frame", { expect_that(toJSON(data.frame(foo=TRUE)), equals("[{\"foo\":true}]")); expect_that(toJSON(data.frame(foo=FALSE)), equals("[{\"foo\":false}]")); expect_that(toJSON(data.frame(foo=as.logical(NA))), equals("[{}]")); expect_that(toJSON(data.frame(foo=as.logical(NA)), na="null"), equals("[{\"foo\":null}]")); expect_that(toJSON(data.frame(foo=as.logical(NA)), na="string"), equals("[{\"foo\":\"NA\"}]")); expect_that(toJSON(data.frame(foo=c(TRUE, NA, FALSE))), equals("[{\"foo\":true},{},{\"foo\":false}]")); expect_that(toJSON(data.frame(foo=logical())), equals("[]")); }); jsonlite/inst/tests/flatten.R 0000644 0001762 0000144 00000000447 12540777273 016023 0 ustar ligges users context("flatten") test_that("flattening", { x <- list(test = data.frame(foo=1:3)) x$test$bar <- data.frame(x=5:3, y=7:9) expect_that(x, equals(fromJSON(toJSON(x), flatten = FALSE))); expect_that(names(fromJSON(toJSON(x), flatten = TRUE)$test), equals(c("foo", "bar.x", "bar.y"))) }); jsonlite/inst/tests/test-serializeJSON-datasets.R 0000644 0001762 0000144 00000000732 12540777273 021627 0 ustar ligges users #test serializeJSON context("Serializing Datasets") # Note about numeric precision # In the unit tests we use digits=10. Lowever values will result in problems for some datasets test_that("Serializing datasets", { library(datasets); lapply(as.list(ls("package:datasets")), function(x){ mycall <- call("expect_that", call("unserializeJSON", call("serializeJSON", as.name(x), digits=10)), call("equals", as.name(x)) ); eval(mycall) }); }); jsonlite/inst/tests/test-toJSON-numeric.R 0000644 0001762 0000144 00000002654 12540777273 020121 0 ustar ligges users context("toJSON Numeric") test_that("Encoding Numbers", { expect_that(toJSON(35), equals("[35]")); expect_that(toJSON(35L), equals("[35]")); expect_that(toJSON(c(35, pi), digits=5), equals("[35,3.14159]")); expect_that(toJSON(pi, digits=0), equals("[3]")); expect_that(toJSON(pi, digits=2), equals("[3.14]")); expect_that(toJSON(pi, digits=10), equals("[3.1415926536]")); expect_that(toJSON(c(pi, NA), na="string", digits=5), equals("[3.14159,\"NA\"]")); expect_that(toJSON(c(pi, NA), na="null", digits=5), equals("[3.14159,null]")); }); test_that("Encoding Numbers in Data Frame", { expect_that(toJSON(data.frame(foo=35)), equals("[{\"foo\":35}]")); expect_that(toJSON(data.frame(foo=35L)), equals("[{\"foo\":35}]")); expect_that(toJSON(data.frame(foo=c(35, pi)), digits=5), equals("[{\"foo\":35},{\"foo\":3.14159}]")); expect_that(toJSON(data.frame(foo=pi), digits=0), equals("[{\"foo\":3}]")); expect_that(toJSON(data.frame(foo=pi), digits=2), equals("[{\"foo\":3.14}]")); expect_that(toJSON(data.frame(foo=pi), digits=10), equals("[{\"foo\":3.1415926536}]")); expect_that(toJSON(data.frame(foo=c(pi, NA)), digits=5), equals("[{\"foo\":3.14159},{}]")); expect_that(toJSON(data.frame(foo=c(pi, NA)), na="string", digits=5), equals("[{\"foo\":3.14159},{\"foo\":\"NA\"}]")); expect_that(toJSON(data.frame(foo=c(pi, NA)), na="null", digits=5), equals("[{\"foo\":3.14159},{\"foo\":null}]")); }); jsonlite/inst/tests/testS4.R 0000644 0001762 0000144 00000000521 12540777273 015545 0 ustar ligges users # setClass( # Class="Trajectories", # representation=representation( # times = "numeric", # traj = "matrix" # ) # ); # # t1 = new(Class="Trajectories") # t2 = new(Class="Trajectories",times=c(1,3,4)) # t3 = new(Class="Trajectories",times=c(1,3),traj=matrix(1:4,ncol=2)) # # cat(asJSON(t3, pretty=T)) # cat(encode(t3, pretty=T)) jsonlite/inst/tests/test-toJSON-raw.R 0000644 0001762 0000144 00000000500 12540777273 017234 0 ustar ligges users context("toJSON raw") test_that("Encoding raw vector", { x <- list(myraw = charToRaw("bla")) x$mydf <- data.frame(foo=1:3) x$mydf$bar <- as.character.hexmode(charToRaw("bla")) y <- fromJSON(toJSON(x)) expect_that(x$mydf$bar, is_identical_to(y$mydf$bar)) expect_that(y$myraw, is_identical_to("Ymxh")) }); jsonlite/inst/tests/test-toJSON-complex.R 0000644 0001762 0000144 00000003122 12540777273 020115 0 ustar ligges users context("toJSON Complex") test_that("Encoding Complex", { expect_that(toJSON(complex(real=2, imaginary=2)), equals("[\"2+2i\"]")); expect_that(toJSON(complex(real=NA, imaginary=2)), equals("[\"NA\"]")); expect_that(toJSON(complex(real=1, imaginary=NA)), equals("[\"NA\"]")); expect_that(toJSON(complex(real=NA, imaginary=2), na="null"), equals("[null]")); }); test_that("Encoding Complex in Data Frame", { expect_that(toJSON(data.frame(foo=complex(real=1, imaginary=2))), equals("[{\"foo\":\"1+2i\"}]")); expect_that(toJSON(data.frame(foo=complex(real=NA, imaginary=2))), equals("[{}]")); expect_that(toJSON(data.frame(foo=complex(real=NA, imaginary=2)), na="string"), equals("[{\"foo\":\"NA\"}]")); expect_that(toJSON(data.frame(foo=complex(real=NA, imaginary=2)), na="null"), equals("[{\"foo\":null}]")); }); test_that("Encoding Complex as list", { x <- complex(real=c(1,2,NA), imaginary=3:1); expect_that(toJSON(x), equals("[\"1+3i\",\"2+2i\",\"NA\"]")); expect_that(toJSON(x, complex="list"), equals("{\"real\":[1,2,\"NA\"],\"imaginary\":[3,2,1]}")); expect_that(toJSON(data.frame(foo=x), complex="list"), equals("[{\"foo\":{\"real\":1,\"imaginary\":3}},{\"foo\":{\"real\":2,\"imaginary\":2}},{\"foo\":{\"imaginary\":1}}]")); expect_that(toJSON(data.frame(foo=x), complex="list", na="string"), equals("[{\"foo\":{\"real\":1,\"imaginary\":3}},{\"foo\":{\"real\":2,\"imaginary\":2}},{\"foo\":{\"real\":\"NA\",\"imaginary\":1}}]")); expect_that(toJSON(data.frame(foo=x), complex="list", dataframe="columns"), equals("{\"foo\":{\"real\":[1,2,\"NA\"],\"imaginary\":[3,2,1]}}")) }); jsonlite/inst/tests/test-fromJSON-datasets.R 0000644 0001762 0000144 00000001241 12540777273 020577 0 ustar ligges users context("fromJSON datasets") # Note about numeric precision # In the unit tests we use digits=10. Lowever values will result in problems for some datasets test_that("fromJSON datasets", { objects <- Filter(is.data.frame, lapply(ls("package:datasets"), get)); #data frames are never identical because: # - attributes # - factors, times, dates turn into strings # - integers turn into numeric lapply(objects, function(object){ newobject <- fromJSON(toJSON(object)) expect_that(newobject, is_a("data.frame")); expect_that(names(object), is_identical_to(names(newobject))); expect_that(nrow(object), is_identical_to(nrow(newobject))) }); }); jsonlite/inst/tests/test-libjson-validator.R 0000644 0001762 0000144 00000000744 12540777273 020766 0 ustar ligges users context("libjson Validator") test_that("test that the validator properly deals with escaped characters", { #create random strings mychars <- c('a', 'b', " ", '"', "\\", "\t", "\n", "'", "/", "#", "$"); createstring <- function(length){ paste(mychars[ceiling(runif(length, 0, length(mychars)))], collapse="") } for(i in 1:200){ #create some random strings to validate x <- createstring(i); expect_that(validate(toJSON(x)), is_true()); } }); jsonlite/inst/tests/test-toJSON-dataframe.R 0000644 0001762 0000144 00000001226 12540777273 020375 0 ustar ligges users context("toJSON Data Frame") test_that("data frame edge cases", { #unname named list test <- data.frame(foo=1:2) test$bar <- list(x=123, y=123) test$baz <- data.frame(z=456:457) expect_that(toJSON(test), equals('[{"foo":1,"bar":[123],"baz":{"z":456}},{"foo":2,"bar":[123],"baz":{"z":457}}]')) }); test_that("Nested structures", { mydata <- data.frame(row.names=1:2) mydata$d <- list( data.frame(a1=1:2, a2=3:4, a3=5:6, a4=7:8), data.frame(a1=11:12, a2=13:14, a3=15:16, a4=17:18) ) mydata$m <- list( matrix(1:6, nrow=2, ncol=3), matrix(6:1, nrow=2, ncol=3) ) expect_that(fromJSON(toJSON(mydata)), equals(mydata)); }); jsonlite/inst/tests/test-toJSON-NULL-values.R 0000644 0001762 0000144 00000001572 12540777273 020524 0 ustar ligges users context("toJSON NULL values") test_that("Test NULL values", { namedlist <- structure(list(), .Names = character(0)); x <- NULL y <- list(a=NULL, b=NA) z <- list(a=1, b=character(0)) expect_that(validate(toJSON(x)), is_true()) expect_that(fromJSON(toJSON(x)), equals(namedlist)) expect_that(toJSON(x), equals("{}")) expect_that(toJSON(x, null="list"), equals("{}")) expect_that(validate(toJSON(y)), is_true()) expect_that(toJSON(y, null="list"), equals("{\"a\":{},\"b\":[null]}")) expect_that(toJSON(y, null="null"), equals("{\"a\":null,\"b\":[null]}")) expect_that(fromJSON(toJSON(y, null="null")), equals(y)) expect_that(fromJSON(toJSON(y, null="list")), equals(list(a=namedlist, b=NA))) expect_that(validate(toJSON(z)), is_true()) expect_that(toJSON(z), equals("{\"a\":[1],\"b\":[]}")) expect_that(fromJSON(toJSON(z)), equals(list(a=1, b=list()))) }); jsonlite/inst/tests/issues.txt 0000644 0001762 0000144 00000000272 12540777273 016313 0 ustar ligges users #For timeseries, numeric precision can result in corrupt objects: out <- unserializeJSON(serializeJSON(AirPassengers, digits=5)) all.equal(out, AirPassengers, tolerance=1e-10) print(out) jsonlite/inst/tests/readme.txt 0000644 0001762 0000144 00000000313 12540777273 016231 0 ustar ligges users This dir contains unit tests for use with the testthat package. They are intended to be tested by a non-root user. To run them, install this package and run: library(testthat) test_package("jsonlite") jsonlite/inst/tests/test-libjson-large.R 0000644 0001762 0000144 00000000715 12540777273 020071 0 ustar ligges users context("libjson Large strings") test_that("escaping and parsing of special characters", { #create random strings mychars <- c('a', 'b', " ", '"', "\\", "\t", "\n", "'", "/", "#", "$"); createstring <- function(length){ paste(mychars[ceiling(runif(length, 0, length(mychars)))], collapse="") } #try some very long strings for(i in 1:10){ zz <- list(foo=createstring(1e5)) expect_that(zz, equals(fromJSON(toJSON(zz)))); } }); jsonlite/inst/tests/test-network-Github.R 0000644 0001762 0000144 00000005041 12540777273 020247 0 ustar ligges users context("Github API") test_that("Non Nested", { mydata <- fromJSON("https://api.github.com/users/hadley/orgs"); expect_that(mydata, is_a("data.frame")); }); test_that("Nested 1 Level", { mydata <- fromJSON("https://api.github.com/users/hadley/repos"); expect_that(mydata, is_a("data.frame")); expect_that(mydata$owner, is_a("data.frame")); expect_that(nrow(mydata), equals(nrow(mydata$owner))); }); test_that("Nested 1 Level", { mydata <- fromJSON("https://api.github.com/repos/hadley/ggplot2/issues"); expect_that(mydata, is_a("data.frame")); expect_that(mydata$user, is_a("data.frame")); expect_that(mydata$pull_request, is_a("data.frame")); expect_that(nrow(mydata), equals(nrow(mydata$pull_request))); }); test_that("Nested 1 Level within list", { mydata <- fromJSON("https://api.github.com/search/repositories?q=tetris+language:assembly&sort=stars&order=desc"); expect_that(mydata, is_a("list")); expect_that(mydata$items, is_a("data.frame")); expect_that(mydata$items$owner, is_a("data.frame")); expect_that(nrow(mydata$items), equals(nrow(mydata$items$owner))); }); test_that("Nested 2 Level", { mydata <- fromJSON("https://api.github.com/repos/hadley/ggplot2/commits"); expect_that(mydata, is_a("data.frame")); expect_that(mydata$commit, is_a("data.frame")); expect_that(mydata$commit$author, is_a("data.frame")); expect_that(mydata$commit$author$name, is_a("character")); expect_that(nrow(mydata), equals(nrow(mydata$commit))); expect_that(nrow(mydata), equals(nrow(mydata$commit$author))); }); test_that("Nested inconsistent (payload), one-to-many", { mydata <- fromJSON("https://api.github.com/users/hadley/events"); expect_that(mydata, is_a("data.frame")); expect_that(mydata$actor, is_a("data.frame")); expect_that(mydata$repo, is_a("data.frame")); expect_that(mydata$type, is_a("character")); expect_that(mydata$payload, is_a("data.frame")); #this is dynamic, depends on data if(any(mydata$type == "PushEvent")){ expect_that(all(vapply(mydata$payload$commits, function(x){is.null(x) || is.data.frame(x)}, logical(1))), is_true()); } }); test_that("Nested inconsistent (payload), one-to-many", { mydata <- fromJSON("https://api.github.com/repos/hadley/ggplot2/events"); if(any("ForkEvent" %in% mydata$type)){ expect_that(mydata$payload$forkee$owner, is_a("data.frame")) } if(any(mydata$type %in% c("IssuesEvent", "IssueCommentEvent"))){ expect_that(mydata$payload$issue, is_a("data.frame")); expect_that(mydata$payload$issue$user, is_a("data.frame")); } }); jsonlite/inst/tests/test-toJSON-zerovec.R 0000644 0001762 0000144 00000002553 12540777273 020132 0 ustar ligges users context("toJSON zerovec") test_that("Encoding Factor Objects", { expect_that(toJSON(character()), is_identical_to("[]")) expect_that(toJSON(logical()), is_identical_to("[]")) expect_that(toJSON(complex()), is_identical_to("[]")) expect_that(toJSON(complex(), complex="list"), is_identical_to("{\"real\":[],\"imaginary\":[]}")) expect_that(toJSON(double()), is_identical_to("[]")) expect_that(toJSON(integer()), is_identical_to("[]")) expect_that(toJSON(list()), is_identical_to("[]")) expect_that(toJSON(factor()), is_identical_to("[]")) expect_that(toJSON(factor(levels=c("foo", "bar"))), is_identical_to("[]")) expect_that(toJSON(matrix(nrow=0, ncol=0)), is_identical_to("[]")) expect_that(toJSON(as.matrix(numeric())), is_identical_to("[]")) expect_that(toJSON(data.frame()), is_identical_to("[]")) expect_that(toJSON(data.frame(foo=vector())), is_identical_to("[]")) expect_that(toJSON(data.frame(foo=vector(), bar=logical())), is_identical_to("[]")) expect_that(toJSON(Sys.time()[0], POSIXt="string"), is_identical_to("[]")) expect_that(toJSON(Sys.time()[0], POSIXt="epoch"), is_identical_to("[]")) expect_that(toJSON(Sys.time()[0], POSIXt="mongo"), is_identical_to("[]")) expect_that(toJSON(Sys.time()[0], POSIXt="ISO8601"), is_identical_to("[]")) expect_that(toJSON(as.Date(Sys.time())[0], POSIXt="ISO8601"), is_identical_to("[]")) }); jsonlite/inst/tests/test-fromJSON-matrix.R 0000644 0001762 0000144 00000002657 12540777273 020307 0 ustar ligges users context("fromJSON Matrix") # Note about numeric precision # In the unit tests we use digits=10. Lowever values will result in problems for some datasets test_that("fromJSON Matrix", { objects <- list( matrix(1), matrix(1:2), matrix(1:2, nrow=1), matrix(round(pi,2)), matrix(c(1,NA,2,NA), 2), volcano, matrix(NA) ); lapply(objects, function(object){ newobject <- fromJSON(toJSON(object)); expect_that(newobject, is_a("matrix")); expect_that(object, equals(newobject)); }); expect_that(fromJSON(toJSON(objects)), equals(objects)); }); test_that("fromJSON Matrix with simplifyMatrix=FALSE", { expect_that(fromJSON(toJSON(matrix(1)), simplifyMatrix=FALSE), equals(list(1))); expect_that(fromJSON(toJSON(matrix(1)), simplifyVector=FALSE), equals(list(list((1))))); expect_that(fromJSON(toJSON(matrix(NA)), simplifyMatrix=FALSE), equals(list(NA))); expect_that(fromJSON(toJSON(matrix(NA)), simplifyVector=FALSE), equals(list(list((NULL))))); }); test_that("fromJSON Matrix datasets", { objects <- Filter(is.matrix, lapply(ls("package:datasets"), get)); lapply(objects, function(object){ class(object) <- "matrix"; newobject <- fromJSON(toJSON(object, digits=4)) expect_that(newobject, is_a("matrix")); expect_that(dim(newobject), equals(dim(object))); attributes(newobject) <- attributes(object); expect_that(newobject, equals(round(object,4))); }); }); jsonlite/inst/tests/test-toJSON-matrix.R 0000644 0001762 0000144 00000000600 12540777273 017750 0 ustar ligges users context("toJSON Matrix") test_that("Encoding a Matrix", { expect_that(toJSON(matrix(1)), equals("[[1]]")); expect_that(toJSON(matrix(pi), digits=5), equals("[[3.14159]]")); expect_that(toJSON(matrix(1:2)), equals("[[1],[2]]")); expect_that(toJSON(matrix(1:2, nrow=1)), equals("[[1,2]]")); expect_that(toJSON(matrix(state.x77[1,1, drop=FALSE])), equals("[[3615]]")); }); jsonlite/inst/tests/test-fromJSON-date.R 0000644 0001762 0000144 00000001467 12540777273 017716 0 ustar ligges users context("fromJSON date objects") test_that("fromJSON date objects", { x <- Sys.time() + c(1, 2, NA, 3) mydf <- data.frame(x=x) expect_that(fromJSON(toJSON(x, POSIXt="mongo")), is_a("POSIXct")) expect_that(fromJSON(toJSON(x, POSIXt="mongo")), equals(x)) expect_that(fromJSON(toJSON(x, POSIXt="mongo", na="string")), is_a("POSIXct")) expect_that(fromJSON(toJSON(x, POSIXt="mongo", na="null")), is_a("POSIXct")) expect_that(fromJSON(toJSON(mydf, POSIXt="mongo")), is_a("data.frame")) expect_that(fromJSON(toJSON(mydf, POSIXt="mongo"))$x, is_a("POSIXct")) expect_that(fromJSON(toJSON(mydf, POSIXt="mongo", na="string"))$x, is_a("POSIXct")) expect_that(fromJSON(toJSON(mydf, POSIXt="mongo", na="null"))$x, is_a("POSIXct")) expect_that(fromJSON(toJSON(mydf, POSIXt="mongo"))$x, equals(x)) }); jsonlite/inst/tests/test-toJSON-POSIXt.R 0000644 0001762 0000144 00000006724 12540777273 017547 0 ustar ligges users context("toJSON POSIXt") objects <- list( as.POSIXlt("2013-06-17 22:33:44"), as.POSIXct("2013-06-17 22:33:44"), as.POSIXlt("2013-06-17 22:33:44", tz="Australia/Darwin"), as.POSIXct("2013-06-17 22:33:44", tz="Australia/Darwin") ) test_that("Encoding POSIXt Objects", { #string based formats do not depends on the current local timezone invisible(lapply(objects, function(object){ expect_that(toJSON(object), equals("[\"2013-06-17 22:33:44\"]")); expect_that(toJSON(object, POSIXt="string"), equals("[\"2013-06-17 22:33:44\"]")); expect_that(toJSON(object, POSIXt="ISO8601"), equals("[\"2013-06-17T22:33:44\"]")); expect_that(toJSON(object, POSIXt="sdfsdsdf"), throws_error("one of")); })); #object 1 and 2 will result in a location specific epoch invisible(lapply(objects[3:4], function(object){ expect_that(toJSON(object, POSIXt="epoch"), equals("[1371474224000]")); expect_that(toJSON(object, POSIXt="mongo"), equals("[{\"$date\":1371474224000}]")); })); }); test_that("Encoding POSIXt object in a list", { #string based formats do not depends on the current local timezone invisible(lapply(objects, function(object){ expect_that(toJSON(list(foo=object)), equals("{\"foo\":[\"2013-06-17 22:33:44\"]}")); expect_that(toJSON(list(foo=object), POSIXt="string"), equals("{\"foo\":[\"2013-06-17 22:33:44\"]}")); expect_that(toJSON(list(foo=object), POSIXt="ISO8601"), equals("{\"foo\":[\"2013-06-17T22:33:44\"]}")); expect_that(toJSON(list(foo=object), POSIXt="sdfsdsdf"), throws_error("one of")); })); #list(foo=object) 1 and 2 will result in a location specific epoch invisible(lapply(objects[3:4], function(object){ expect_that(toJSON(list(foo=object), POSIXt="epoch"), equals("{\"foo\":[1371474224000]}")); expect_that(toJSON(list(foo=object), POSIXt="mongo"), equals("{\"foo\":[{\"$date\":1371474224000}]}")); })); }); test_that("Encoding POSIXt object in a list", { #string based formats do not depends on the current local timezone invisible(lapply(objects, function(object){ expect_that(toJSON(data.frame(foo=object)), equals("[{\"foo\":\"2013-06-17 22:33:44\"}]")); expect_that(toJSON(data.frame(foo=object), POSIXt="string"), equals("[{\"foo\":\"2013-06-17 22:33:44\"}]")); expect_that(toJSON(data.frame(foo=object), POSIXt="ISO8601"), equals("[{\"foo\":\"2013-06-17T22:33:44\"}]")); expect_that(toJSON(data.frame(foo=object), POSIXt="sdfsdsdf"), throws_error("one of")); })); #list(foo=object) 1 and 2 will result in a location specific epoch invisible(lapply(objects[3:4], function(object){ expect_that(toJSON(data.frame(foo=object), POSIXt="epoch"), equals("[{\"foo\":1371474224000}]")); expect_that(toJSON(data.frame(foo=object), POSIXt="mongo"), equals("[{\"foo\":{\"$date\":1371474224000}}]")); })); }); test_that("POSIXt NA values", { newobj <- list( c(objects[[1]], NA), c(objects[[2]], NA) ); lapply(newobj, function(object){ expect_that(toJSON(object), equals("[\"2013-06-17 22:33:44\",null]")); expect_that(toJSON(object, na="string"), equals("[\"2013-06-17 22:33:44\",\"NA\"]")); expect_that(toJSON(data.frame(foo=object)), equals("[{\"foo\":\"2013-06-17 22:33:44\"},{}]")); expect_that(toJSON(data.frame(foo=object), na="null"), equals("[{\"foo\":\"2013-06-17 22:33:44\"},{\"foo\":null}]")); expect_that(toJSON(data.frame(foo=object), na="string"), equals("[{\"foo\":\"2013-06-17 22:33:44\"},{\"foo\":\"NA\"}]")); }); }); jsonlite/inst/tests/test-toJSON-AsIs.R 0000644 0001762 0000144 00000001171 12540777273 017307 0 ustar ligges users context("toJSON AsIs") test_that("Encoding AsIs", { expect_that(toJSON(list(1), auto_unbox=TRUE), equals("[1]")); expect_that(toJSON(list(I(1)), auto_unbox=TRUE), equals("[[1]]")); expect_that(toJSON(I(list(1)), auto_unbox=TRUE), equals("[1]")); expect_that(toJSON(list(x=1)), equals("{\"x\":[1]}")); expect_that(toJSON(list(x=1), auto_unbox=TRUE), equals("{\"x\":1}")); expect_that(toJSON(list(x=I(1)), auto_unbox=TRUE), equals("{\"x\":[1]}")); expect_that(toJSON(list(x=I(list(1))), auto_unbox=TRUE), equals("{\"x\":[1]}")); expect_that(toJSON(list(x=list(I(1))), auto_unbox=TRUE), equals("{\"x\":[[1]]}")); }); jsonlite/inst/tests/test-fromJSON-NA-values.R 0000644 0001762 0000144 00000001100 12540777273 020554 0 ustar ligges users context("fromJSON NA values") test_that("fromJSON NA values", { objects <- list( numbers = c(1,2, NA, NaN, Inf, -Inf, 3.14), logical = c(TRUE, FALSE, NA), integers = as.integer(1,2,3), num = 3.14, bool = FALSE, character = c("FOO","NA", NA, "NaN"), integer = 21L, boolNA = as.logical(NA), df = data.frame(foo=c(1,NA)) ) #test all but list lapply(objects, function(object){ expect_that(fromJSON(toJSON(object)), equals(object)) }); #test all in list expect_that(fromJSON(toJSON(objects)), equals(objects)) }); jsonlite/inst/doc/ 0000755 0001762 0000144 00000000000 12626133701 013624 5 ustar ligges users jsonlite/inst/doc/json-aaquickstart.R 0000644 0001762 0000144 00000003316 12573053677 017433 0 ustar ligges users ## ----echo=FALSE---------------------------------------------------------- library(knitr) opts_chunk$set(comment="") #this replaces tabs by spaces because latex-verbatim doesn't like tabs #no longer needed because yajl does not use tabs. #toJSON <- function(...){ # gsub("\t", " ", jsonlite::toJSON(...), fixed=TRUE); #} ## ----message=FALSE------------------------------------------------------- library(jsonlite) all.equal(mtcars, fromJSON(toJSON(mtcars))) ## ------------------------------------------------------------------------ # A JSON array of primitives json <- '["Mario", "Peach", null, "Bowser"]' # Simplifies into an atomic vector fromJSON(json) ## ------------------------------------------------------------------------ # No simplification: fromJSON(json, simplifyVector = FALSE) ## ------------------------------------------------------------------------ json <- '[ {"Name" : "Mario", "Age" : 32, "Occupation" : "Plumber"}, {"Name" : "Peach", "Age" : 21, "Occupation" : "Princess"}, {}, {"Name" : "Bowser", "Occupation" : "Koopa"} ]' mydf <- fromJSON(json) mydf ## ------------------------------------------------------------------------ mydf$Ranking <- c(3, 1, 2, 4) toJSON(mydf, pretty=TRUE) ## ------------------------------------------------------------------------ json <- '[ [1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12] ]' mymatrix <- fromJSON(json) mymatrix ## ------------------------------------------------------------------------ toJSON(mymatrix, pretty = TRUE) ## ------------------------------------------------------------------------ json <- '[ [[1, 2], [3, 4]], [[5, 6], [7, 8]], [[9, 10], [11, 12]] ]' myarray <- fromJSON(json) myarray[1, , ] myarray[ , ,1] jsonlite/inst/doc/json-paging.html 0000644 0001762 0000144 00002403260 12573053677 016753 0 ustar ligges users
The jsonlite package is a JSON
parser/generator for R which is optimized for pipelines and web APIs. It is used by the OpenCPU system and many other packages to get data in and out of R using the JSON
format.
One of the main strengths of jsonlite
is that it implements a bidirectional mapping between JSON and data frames. Thereby it can convert nested collections of JSON records, as they often appear on the web, immediately into the appropriate R structure. For example to grab some data from ProPublica we can simply use:
library(jsonlite)
mydata <- fromJSON("https://projects.propublica.org/forensics/geos.json", flatten = TRUE)
View(mydata)
The mydata
object is a data frame which can be used directly for modeling or visualization, without the need for any further complicated data manipulation.
A question that comes up frequently is how to combine pages of data. Most web APIs limit the amount of data that can be retrieved per request. If the client needs more data than what can fits in a single request, it needs to break down the data into multiple requests that each retrieve a fragment (page) of data, not unlike pages in a book. In practice this is often implemented using a page
parameter in the API. Below an example from the ProPublica Nonprofit Explorer API where we retrieve the first 3 pages of tax-exempt organizations in the USA, ordered by revenue:
baseurl <- "https://projects.propublica.org/nonprofits/api/v1/search.json?order=revenue&sort_order=desc"
mydata0 <- fromJSON(paste0(baseurl, "&page=0"), flatten = TRUE)
mydata1 <- fromJSON(paste0(baseurl, "&page=1"), flatten = TRUE)
mydata2 <- fromJSON(paste0(baseurl, "&page=2"), flatten = TRUE)
#The actual data is in the filings element
mydata0$filings[1:10, c("organization.sub_name", "organization.city", "totrevenue")]
organization.sub_name organization.city
1 KAISER FOUNDATION HEALTH PLAN INC OAKLAND
2 KAISER FOUNDATION HEALTH PLAN INC OAKLAND
3 KAISER FOUNDATION HEALTH PLAN INC OAKLAND
4 DAVIDSON COUNTY COMMUNITY COLLEGE FOUNDATION INC LEXINGTON
5 KAISER FOUNDATION HOSPITALS OAKLAND
6 KAISER FOUNDATION HOSPITALS OAKLAND
7 KAISER FOUNDATION HOSPITALS OAKLAND
8 PARTNERS HEALTHCARE SYSTEM INC CHARLESTOWN
9 PARTNERS HEALTHCARE SYSTEM INC CHARLESTOWN
10 PARTNERS HEALTHCARE SYSTEM INC CHARLESTOWN
totrevenue
1 42346486950
2 40148558254
3 37786011714
4 30821445312
5 20013171194
6 18543043972
7 17980030355
8 10619215354
9 10452560305
10 9636630380
To analyze or visualize these data, we need to combine the pages into a single dataset. We can do this with the rbind.pages
function. Note that in this example, the actual data is contained by the filings
field:
#Rows per data frame
nrow(mydata0$filings)
[1] 25
#Combine data frames
filings <- rbind.pages(
list(mydata0$filings, mydata1$filings, mydata2$filings)
)
#Total number of rows
nrow(filings)
[1] 75
We can write a simple loop that automatically downloads and combines many pages. For example to retrieve the first 20 pages with non-profits from the example above:
#store all pages in a list first
baseurl <- "https://projects.propublica.org/nonprofits/api/v1/search.json?order=revenue&sort_order=desc"
pages <- list()
for(i in 0:20){
mydata <- fromJSON(paste0(baseurl, "&page=", i))
message("Retrieving page ", i)
pages[[i+1]] <- mydata$filings
}
#combine all into one
filings <- rbind.pages(pages)
#check output
nrow(filings)
[1] 525
colnames(filings)
[1] "tax_prd" "tax_prd_yr"
[3] "formtype" "pdf_url"
[5] "updated" "totrevenue"
[7] "totfuncexpns" "totassetsend"
[9] "totliabend" "pct_compnsatncurrofcr"
[11] "tax_pd" "subseccd"
[13] "unrelbusinccd" "initiationfees"
[15] "grsrcptspublicuse" "grsincmembers"
[17] "grsincother" "totcntrbgfts"
[19] "totprgmrevnue" "invstmntinc"
[21] "txexmptbndsproceeds" "royaltsinc"
[23] "grsrntsreal" "grsrntsprsnl"
[25] "rntlexpnsreal" "rntlexpnsprsnl"
[27] "rntlincreal" "rntlincprsnl"
[29] "netrntlinc" "grsalesecur"
[31] "grsalesothr" "cstbasisecur"
[33] "cstbasisothr" "gnlsecur"
[35] "gnlsothr" "netgnls"
[37] "grsincfndrsng" "lessdirfndrsng"
[39] "netincfndrsng" "grsincgaming"
[41] "lessdirgaming" "netincgaming"
[43] "grsalesinvent" "lesscstofgoods"
[45] "netincsales" "miscrevtot11e"
[47] "compnsatncurrofcr" "othrsalwages"
[49] "payrolltx" "profndraising"
[51] "txexmptbndsend" "secrdmrtgsend"
[53] "unsecurednotesend" "retainedearnend"
[55] "totnetassetend" "nonpfrea"
[57] "gftgrntsrcvd170" "txrevnuelevied170"
[59] "srvcsval170" "grsinc170"
[61] "grsrcptsrelated170" "totgftgrntrcvd509"
[63] "grsrcptsadmissn509" "txrevnuelevied509"
[65] "srvcsval509" "subtotsuppinc509"
[67] "totsupp509" "ein"
[69] "organization" "eostatus"
[71] "tax_yr" "operatingcd"
[73] "assetcdgen" "transinccd"
[75] "subcd" "grscontrgifts"
[77] "intrstrvnue" "dividndsamt"
[79] "totexcapgn" "totexcapls"
[81] "grsprofitbus" "otherincamt"
[83] "compofficers" "contrpdpbks"
[85] "totrcptperbks" "totexpnspbks"
[87] "excessrcpts" "totexpnsexempt"
[89] "netinvstinc" "totaxpyr"
[91] "adjnetinc" "invstgovtoblig"
[93] "invstcorpstk" "invstcorpbnd"
[95] "totinvstsec" "fairmrktvalamt"
[97] "undistribincyr" "cmpmininvstret"
[99] "sec4940notxcd" "sec4940redtxcd"
[101] "infleg" "contractncd"
[103] "claimstatcd" "propexchcd"
[105] "brwlndmnycd" "furngoodscd"
[107] "paidcmpncd" "trnsothasstscd"
[109] "agremkpaycd" "undistrinccd"
[111] "dirindirintcd" "invstjexmptcd"
[113] "propgndacd" "excesshldcd"
[115] "grntindivcd" "nchrtygrntcd"
[117] "nreligiouscd" "grsrents"
[119] "costsold" "totrcptnetinc"
[121] "trcptadjnetinc" "topradmnexpnsa"
[123] "topradmnexpnsb" "topradmnexpnsd"
[125] "totexpnsnetinc" "totexpnsadjnet"
[127] "othrcashamt" "mrtgloans"
[129] "othrinvstend" "fairmrktvaleoy"
[131] "mrtgnotespay" "tfundnworth"
[133] "invstexcisetx" "sect511tx"
[135] "subtitleatx" "esttaxcr"
[137] "txwithldsrc" "txpaidf2758"
[139] "erronbkupwthld" "estpnlty"
[141] "balduopt" "crelamt"
[143] "tfairmrktunuse" "distribamt"
[145] "adjnetinccola" "adjnetinccolb"
[147] "adjnetinccolc" "adjnetinccold"
[149] "adjnetinctot" "qlfydistriba"
[151] "qlfydistribb" "qlfydistribc"
[153] "qlfydistribd" "qlfydistribtot"
[155] "valassetscola" "valassetscolb"
[157] "valassetscolc" "valassetscold"
[159] "valassetstot" "qlfyasseta"
[161] "qlfyassetb" "qlfyassetc"
[163] "qlfyassetd" "qlfyassettot"
[165] "endwmntscola" "endwmntscolb"
[167] "endwmntscolc" "endwmntscold"
[169] "endwmntstot" "totsuprtcola"
[171] "totsuprtcolb" "totsuprtcolc"
[173] "totsuprtcold" "totsuprttot"
[175] "pubsuprtcola" "pubsuprtcolb"
[177] "pubsuprtcolc" "pubsuprtcold"
[179] "pubsuprttot" "grsinvstinca"
[181] "grsinvstincb" "grsinvstincc"
[183] "grsinvstincd" "grsinvstinctot"
From here, we can go straight to analyzing the filings data without any further tedious data manipulation.
This section lists some examples of public HTTP APIs that publish data in JSON format. These are great to get a sense of the complex structures that are encountered in real world JSON data. All services are free, but some require registration/authentication. Each example returns lots of data, therefore not all output is printed in this document.
library(jsonlite)
Github is an online code repository and has APIs to get live data on almost all activity. Below some examples from a well known R package and author:
hadley_orgs <- fromJSON("https://api.github.com/users/hadley/orgs")
hadley_repos <- fromJSON("https://api.github.com/users/hadley/repos")
gg_commits <- fromJSON("https://api.github.com/repos/hadley/ggplot2/commits")
gg_issues <- fromJSON("https://api.github.com/repos/hadley/ggplot2/issues")
#latest issues
paste(format(gg_issues$user$login), ":", gg_issues$title)
[1] "idavydov : annotate(\"segment\") wrong position if limits are inverted"
[2] "ben519 : geom_polygon doesn't make NA values grey when using continuous fill"
[3] "has2k1 : Fix multiple tiny issues in the position classes"
[4] "neggert : Problem with geom_bar position=fill and faceting"
[5] "robertzk : Fix typo in geom_linerange docs."
[6] "lionel- : stat_bar() gets confused with numeric discrete data?"
[7] "daattali : Request: support theme axis.ticks.length.x and axis.ticks.length.y"
[8] "sethchandler : Documentation error on %+replace% ?"
[9] "daattali : dev version 1.0.1.9003 has some breaking changes"
[10] "lionel- : Labels"
[11] "nutterb : legend for `geom_line` colour disappears when `alpha` < 1.0"
[12] "wch : scale_name property should be removed from Scale objects"
[13] "wch : scale_details arguments in Coords should be renamed panel_scales or scale"
[14] "wch : ScalesList-related functions should be moved into ggproto object"
[15] "wch : update_geom_defaults and update_stat_defaults should accept Geom and Stat objects"
[16] "wch : Make some ggproto objects immutable. Closes #1237"
[17] "and3k : Control size of the border and padding of geom_label"
[18] "hadley : Consistent argument order and formatting for layer functions"
[19] "hadley : Consistently handle missing values"
[20] "cmohamma : fortify causes fatal error"
[21] "lionel- : Flawed `label_bquote()` implementation"
[22] "beroe : Create alias for `colors=` in `scale_color_gradientn()`"
[23] "and3k : hjust broken in y facets"
[24] "joranE : Allow color bar guides for alpha scales"
[25] "hadley : dir = \"v\" also needs to swap nrow and ncol"
[26] "joranE : Add examples for removing guides"
[27] "lionel- : New approach for horizontal layers"
[28] "bbolker : add horizontal linerange geom"
[29] "hadley : Write vignette about grid"
[30] "hadley : Immutable flag for ggproto objects"
A single public API that shows location, status and current availability for all stations in the New York City bike sharing imitative.
citibike <- fromJSON("http://citibikenyc.com/stations/json")
stations <- citibike$stationBeanList
colnames(stations)
[1] "id" "stationName"
[3] "availableDocks" "totalDocks"
[5] "latitude" "longitude"
[7] "statusValue" "statusKey"
[9] "availableBikes" "stAddress1"
[11] "stAddress2" "city"
[13] "postalCode" "location"
[15] "altitude" "testStation"
[17] "lastCommunicationTime" "landMark"
nrow(stations)
[1] 509
The Ergast Developer API is an experimental web service which provides a historical record of motor racing data for non-commercial purposes.
res <- fromJSON('http://ergast.com/api/f1/2004/1/results.json')
drivers <- res$MRData$RaceTable$Races$Results[[1]]$Driver
colnames(drivers)
[1] "driverId" "code" "url" "givenName"
[5] "familyName" "dateOfBirth" "nationality" "permanentNumber"
drivers[1:10, c("givenName", "familyName", "code", "nationality")]
givenName familyName code nationality
1 Michael Schumacher MSC German
2 Rubens Barrichello BAR Brazilian
3 Fernando Alonso ALO Spanish
4 Ralf Schumacher SCH German
5 Juan Pablo Montoya MON Colombian
6 Jenson Button BUT British
7 Jarno Trulli TRU Italian
8 David Coulthard COU British
9 Takuma Sato SAT Japanese
10 Giancarlo Fisichella FIS Italian
Below an example from the ProPublica Nonprofit Explorer API where we retrieve the first 10 pages of tax-exempt organizations in the USA, ordered by revenue. The rbind.pages
function is used to combine the pages into a single data frame.
#store all pages in a list first
baseurl <- "https://projects.propublica.org/nonprofits/api/v1/search.json?order=revenue&sort_order=desc"
pages <- list()
for(i in 0:10){
mydata <- fromJSON(paste0(baseurl, "&page=", i), flatten=TRUE)
message("Retrieving page ", i)
pages[[i+1]] <- mydata$filings
}
#combine all into one
filings <- rbind.pages(pages)
#check output
nrow(filings)
[1] 275
filings[1:10, c("organization.sub_name", "organization.city", "totrevenue")]
organization.sub_name organization.city
1 KAISER FOUNDATION HEALTH PLAN INC OAKLAND
2 KAISER FOUNDATION HEALTH PLAN INC OAKLAND
3 KAISER FOUNDATION HEALTH PLAN INC OAKLAND
4 DAVIDSON COUNTY COMMUNITY COLLEGE FOUNDATION INC LEXINGTON
5 KAISER FOUNDATION HOSPITALS OAKLAND
6 KAISER FOUNDATION HOSPITALS OAKLAND
7 KAISER FOUNDATION HOSPITALS OAKLAND
8 PARTNERS HEALTHCARE SYSTEM INC CHARLESTOWN
9 PARTNERS HEALTHCARE SYSTEM INC CHARLESTOWN
10 PARTNERS HEALTHCARE SYSTEM INC CHARLESTOWN
totrevenue
1 42346486950
2 40148558254
3 37786011714
4 30821445312
5 20013171194
6 18543043972
7 17980030355
8 10619215354
9 10452560305
10 9636630380
The New York Times has several APIs as part of the NYT developer network. These interface to data from various departments, such as news articles, book reviews, real estate, etc. Registration is required (but free) and a key can be obtained at here. The code below includes some example keys for illustration purposes.
#search for articles
article_key <- "&api-key=c2fede7bd9aea57c898f538e5ec0a1ee:6:68700045"
url <- "http://api.nytimes.com/svc/search/v2/articlesearch.json?q=obamacare+socialism"
req <- fromJSON(paste0(url, article_key))
articles <- req$response$docs
colnames(articles)
[1] "web_url" "snippet" "lead_paragraph"
[4] "abstract" "print_page" "blog"
[7] "source" "multimedia" "headline"
[10] "keywords" "pub_date" "document_type"
[13] "news_desk" "section_name" "subsection_name"
[16] "byline" "type_of_material" "_id"
[19] "word_count"
#search for best sellers
bestseller_key <- "&api-key=5e260a86a6301f55546c83a47d139b0d:3:68700045"
url <- "http://api.nytimes.com/svc/books/v2/lists/overview.json?published_date=2013-01-01"
req <- fromJSON(paste0(url, bestseller_key))
bestsellers <- req$results$list
category1 <- bestsellers[[1, "books"]]
subset(category1, select = c("author", "title", "publisher"))
author title publisher
1 Gillian Flynn GONE GIRL Crown Publishing
2 John Grisham THE RACKETEER Knopf Doubleday Publishing
3 E L James FIFTY SHADES OF GREY Knopf Doubleday Publishing
4 Nicholas Sparks SAFE HAVEN Grand Central Publishing
5 David Baldacci THE FORGOTTEN Grand Central Publishing
#movie reviews
movie_key <- "&api-key=5a3daaeee6bbc6b9df16284bc575e5ba:0:68700045"
url <- "http://api.nytimes.com/svc/movies/v2/reviews/dvd-picks.json?order=by-date"
req <- fromJSON(paste0(url, movie_key))
reviews <- req$results
colnames(reviews)
[1] "nyt_movie_id" "display_title" "sort_name"
[4] "mpaa_rating" "critics_pick" "thousand_best"
[7] "byline" "headline" "capsule_review"
[10] "summary_short" "publication_date" "opening_date"
[13] "dvd_release_date" "date_updated" "seo_name"
[16] "link" "related_urls" "multimedia"
reviews[1:5, c("display_title", "byline", "mpaa_rating")]
display_title byline mpaa_rating
1 Tom at the Farm Stephen Holden NR
2 A Little Chaos Stephen Holden R
3 Big Game Andy Webster PG13
4 Balls Out Andy Webster R
5 Mad Max: Fury Road A. O. Scott R
CrunchBase is the free database of technology companies, people, and investors that anyone can edit.
key <- "f6dv6cas5vw7arn5b9d7mdm3"
res <- fromJSON(paste0("http://api.crunchbase.com/v/1/search.js?query=R&api_key=", key))
head(res$results)
The Sunlight Foundation is a non-profit that helps to make government transparent and accountable through data, tools, policy and journalism. Register a free key at here. An example key is provided.
key <- "&apikey=39c83d5a4acc42be993ee637e2e4ba3d"
#Find bills about drones
drone_bills <- fromJSON(paste0("http://openstates.org/api/v1/bills/?q=drone", key))
drone_bills$title <- substring(drone_bills$title, 1, 40)
print(drone_bills[1:5, c("title", "state", "chamber", "type")])
title state chamber type
1 WILDLIFE-TECH il lower bill
2 Criminalizes the unlawful use of an unma ny lower bill
3 Criminalizes the unlawful use of an unma ny lower bill
4 Relating to: criminal procedure and prov wi lower bill
5 Relating to: criminal procedure and prov wi upper bill
#Congress mentioning "constitution"
res <- fromJSON(paste0("http://capitolwords.org/api/1/dates.json?phrase=immigration", key))
wordcount <- res$results
wordcount$day <- as.Date(wordcount$day)
summary(wordcount)
count day raw_count
Min. : 1.00 Min. :1996-01-02 Min. : 1.00
1st Qu.: 3.00 1st Qu.:2001-01-22 1st Qu.: 3.00
Median : 8.00 Median :2005-11-16 Median : 8.00
Mean : 25.27 Mean :2005-10-02 Mean : 25.27
3rd Qu.: 21.00 3rd Qu.:2010-05-12 3rd Qu.: 21.00
Max. :1835.00 Max. :2015-08-05 Max. :1835.00
#Local legislators
legislators <- fromJSON(paste0("http://congress.api.sunlightfoundation.com/",
"legislators/locate?latitude=42.96&longitude=-108.09", key))
subset(legislators$results, select=c("last_name", "chamber", "term_start", "twitter_id"))
last_name chamber term_start twitter_id
1 Lummis house 2015-01-06 CynthiaLummis
2 Enzi senate 2015-01-06 SenatorEnzi
3 Barrasso senate 2013-01-03 SenJohnBarrasso
The twitter API requires OAuth2 authentication. Some example code:
#Create your own appication key at https://dev.twitter.com/apps
consumer_key = "EZRy5JzOH2QQmVAe9B4j2w";
consumer_secret = "OIDC4MdfZJ82nbwpZfoUO4WOLTYjoRhpHRAWj6JMec";
#Use basic auth
library(httr)
secret <- RCurl::base64(paste(consumer_key, consumer_secret, sep = ":"));
req <- POST("https://api.twitter.com/oauth2/token",
add_headers(
"Authorization" = paste("Basic", secret),
"Content-Type" = "application/x-www-form-urlencoded;charset=UTF-8"
),
body = "grant_type=client_credentials"
);
#Extract the access token
token <- paste("Bearer", content(req)$access_token)
#Actual API call
url <- "https://api.twitter.com/1.1/statuses/user_timeline.json?count=10&screen_name=Rbloggers"
req <- GET(url, add_headers(Authorization = token))
json <- content(req, as = "text")
tweets <- fromJSON(json)
substring(tweets$text, 1, 100)
[1] "Analysing longitudinal data: Multilevel growth models (II) http://t.co/unUxszG7VJ #rstats"
[2] "RcppDE 0.1.4 http://t.co/3qPhFzoOpj #rstats"
[3] "Minimalist Maps http://t.co/fpkNznuCoX #rstats"
[4] "Tutorials freely available of course I taught: including ggplot2, dplyr and shiny http://t.co/WsxX4U"
[5] "Deploying Shiny apps with shinyapps.io http://t.co/tjef1pbKLt #rstats"
[6] "Bootstrap Evaluation of Clusters http://t.co/EbY7ziKCz5 #rstats"
[7] "Add external code to Rmarkdown http://t.co/RCJEmS8gyP #rstats"
[8] "Linear models with weighted observations http://t.co/pUoHpvxAGC #rstats"
[9] "dplyr 0.4.3 http://t.co/ze3zc8t7qj #rstats"
[10] "xkcd survey and the power to shape the internet http://t.co/vNaKhxWxE4 #rstats"