readstata13/0000755000176200001440000000000015002667742012377 5ustar liggesusersreadstata13/tests/0000755000176200001440000000000014372711643013540 5ustar liggesusersreadstata13/tests/testthat/0000755000176200001440000000000015002667742015401 5ustar liggesusersreadstata13/tests/testthat/test_read.R0000644000176200001440000002235215002626101017462 0ustar liggesuserslibrary(readstata13) context("Reading datasets") datacompare <- function(x, y) { res <- unlist(Map(all.equal, x, y)) # with all(unlist(res)) if not TRUE, a warning is thrown res <- all(unlist(lapply(res, isTRUE))) res } #### missings #### # missings.do creates missings.dta # missings.dta contains variable missings containing ., .a, .b, ..., .z # # Note: prior Stata 8 there was only a single missing value dd <- data.frame(missings = as.numeric(rep(NA, 27))) missings <- system.file("extdata", "missings.dta", package="readstata13") dd118 <- read.dta13(missings, missing.type = FALSE) dd118_m <- read.dta13(missings, missing.type = TRUE) mvals <- attr(dd118_m, "missing")$missings test_that("missings", { expect_true(datacompare(dd, dd118)) expect_true(datacompare(dd, dd118_m)) expect_identical(mvals, as.numeric(0:26)) }) # rm(list = files) #### missings msf/lsf #### dd <- data.frame(b = as.logical(c(1,NA)), i=as.integer(c(1,NA)), n=as.numeric(c(1,NA)), s=c("1", ""), stringsAsFactors = FALSE) dd$b <- as.integer(dd$b) missings_msf <- system.file("extdata", "missings_msf.dta", package="readstata13") missings_lsf <- system.file("extdata", "missings_lsf.dta", package="readstata13") dd_msf <- read.dta13(missings_msf) dd_lsf <- read.dta13(missings_lsf) test_that("missings msf/lsf", { expect_true(datacompare(dd, dd_msf)) expect_true(datacompare(dd, dd_lsf)) }) #### generate factors TRUE #### dd <- data.frame(v1 = as.numeric(1:2)) dd$v1 <- factor(x = dd$v1, levels = 1:2, labels = c("one", "2")) gen_fac <- system.file("extdata", "gen_fac.dta", package="readstata13") dd118 <- read.dta13(gen_fac, convert.factors = TRUE, generate.factors = TRUE) test_that("generate.factors TRUE", { expect_true(datacompare(dd, dd118)) }) # rm(list = files) #### generate factors FALSE #### dd <- data.frame(v1 = as.numeric(1:2)) gen_fac <- system.file("extdata", "gen_fac.dta", package="readstata13") suppressWarnings(dd118 <- read.dta13(gen_fac, convert.factors = TRUE, generate.factors = FALSE)) test_that("generate.factors TRUE", { expect_true(datacompare(dd, dd118)) }) #### convert.underscore = TRUE #### dd <- data.frame(v.1 = as.numeric(1:2), v.2 = as.numeric(1:2), long.name.multiple.underscores = as.numeric(1:2)) underscore <- system.file("extdata", "underscore.dta", package="readstata13") dd118 <- read.dta13(underscore, convert.underscore = T) test_that("generate.factors TRUE", { expect_true(datacompare(dd, dd118)) }) #### convert.underscore = FALSE #### dd <- data.frame(v.1 = as.numeric(1:2), v.2 = as.numeric(1:2), long_name_multiple_underscores = as.numeric(1:2)) underscore <- system.file("extdata", "underscore.dta", package="readstata13") dd118 <- read.dta13(underscore, convert.underscore = F) test_that("generate.factors TRUE", { expect_true(datacompare(dd, dd118)) }) #### noint.factors TRUE #### dd <- data.frame(v1 = as.numeric(1:2)) dd$v1 <- factor(x = dd$v1, levels = 1:2, labels = c("one", "1.2")) nonint <- system.file("extdata", "nonint.dta", package="readstata13") dd118 <- read.dta13(nonint, convert.factors = TRUE, generate.factors = TRUE, nonint.factors = TRUE) test_that("nonint.factors TRUE", { expect_true(datacompare(dd, dd118)) }) # rm(list = files) #### encoding TRUE #### umlauts <- c("ä","ö","ü","ß","€","Œ") Encoding(umlauts) <- "UTF-8" ddcp <- dd <- data.frame(num = factor(1:6, levels = 1:6, labels = umlauts), chr = umlauts, stringsAsFactors = FALSE) # Dataset in CP1252 levels(ddcp$num)[5:6] <- c("EUR","OE") ddcp$chr[5:6] <- c("EUR","OE") # Stata 14 encode <- system.file("extdata", "encode.dta", package="readstata13") # Stata 12 encodecp <- system.file("extdata", "encodecp.dta", package="readstata13") ddutf_aE <- read.dta13(encode, convert.factors = TRUE, generate.factors = TRUE, encoding="UTF-8") # On windows the last two characters will fail on default (not in latin1) dd_aE <- read.dta13(encode, convert.factors = TRUE, generate.factors = TRUE) ddcp_aE <- read.dta13(encodecp, convert.factors = TRUE, generate.factors = TRUE) test_that("encoding CP1252", { expect_true(datacompare(ddcp, ddcp_aE)) }) test_that("encoding UTF-8 (Stata 14)", { expect_true(datacompare(dd$chr[1:4], dd_aE$chr[1:4])) expect_true(datacompare(dd, ddutf_aE)) }) test_that("Reading of strls", { strl <- system.file("extdata", "statacar.dta", package="readstata13") ddstrlf <- read.dta13(strl, replace.strl = F) ddstrlfref <- paste0("11_", 1:8) expect_equal(ddstrlf$modelStrL, ddstrlfref) ddstrl <- read.dta13(strl, replace.strl = T) expect_equal(ddstrl$model, ddstrl$modelStrL) }) test_that("reading of many strls", { # slow test N = 1e4 big_strl <- data.frame( x = 1:N, y = sample(LETTERS, N, replace = TRUE), z = c(paste(rep("a", 3000), collapse=""), sample(LETTERS, N-1, replace=TRUE)) ) # writing the file is slow if (!file.exists("big_strl.dta")) readstata13::save.dta13(big_strl, "big_strl.dta") expect_silent(x <- readstata13::read.dta13("big_strl.dta", select.rows = 1)) unlink("big_strl.dta") }) test_that("various datetime conversions", { datetime <- system.file("extdata", "datetime.dta", package="readstata13") td <- c("2001-05-15", "1999-04-01", "1975-11-15", "1960-08-26", "1987-12-16") tc <- c("2011-06-25 05:15:06", "2011-03-13 08:30:45", "2011-04-09 10:17:08", "2012-02-11 10:30:12", "2012-08-01 06:45:59") tc_hh_mm <- c("2011-06-29 10:27:00", "2011-03-26 02:15:00", "2011-04-09 19:35:00", "2012-02-16 02:15:00", "2012-08-02 11:59:00") ty <- c("2011-01-01", "2011-01-01", "2011-01-01", "2012-01-01", "2012-01-01") tm <- c("2011-06-01", "2011-03-01", "2011-04-01", "2012-02-01", "2012-08-01") tq <- c("2011-04-01", "2011-01-01", "2011-04-01", "2012-01-01", "2012-07-01") dd <- data.frame(td = as.Date(td), tc = as.POSIXct(tc, tz = "GMT"), tc_hh_mm = as.POSIXct(tc_hh_mm, tz = "GMT"), ty = as.Date(ty), tm = as.Date(tm), tq = as.Date(tq)) dddates <- read.dta13(datetime, convert.dates = TRUE) expect_true(all.equal(dd, dddates, check.attributes = FALSE)) }) test_that("reading file format 120 works", { fl <- system.file("extdata", "myproject2.dtas", package="readstata13") tmp <- tempdir() fls <- unzip(fl, exdir = tmp) # data name, dta file name, dta version data_fram <- strsplit(readLines(fls[1])[-c(1:2)], " ") data_fram <- as.data.frame(do.call("rbind", data_fram)) expect_equal(data_fram$V1, c("persons", "counties")) # read dtas dtas <- fls[tools::file_ext(fls) == "dta"] expect_equal(basename(dtas), paste0(data_fram$V2, ".dta")) expect_warning( df1 <- read.dta13(dtas[1]), "File contains unhandled alias variable in column: 5" ) df2 <- read.dta13(dtas[2], convert.factors = FALSE) expect_equal(attr(df1, "version"), as.integer(data_fram$V3[1])) expect_equal(attr(df2, "version"), as.integer(data_fram$V3[2])) # backup order nams <- names(df1) # merge: fralias_from in attr(df1, "expansion.fields") tells what to merge df <- merge( df1[-which(names(df1) == "median")], df2, by = "countyid", all.x = TRUE ) # update names as_name <- attr(df1, "expansion.fields")[[16]] nams2 <- names(df) nams2[nams2 == as_name[3]] <- as_name[1] names(df) <- nams2 # resore expected order df <- df[nams] # restore order df <- df[order(df$personid), ] expect_equal( df$personid, 1:20 ) expect_equal( c("personid", "countyid", "income", "counties", "median", "ratio"), names(df) ) # read all frames in myproject2.dtas expect_warning( dtas1 <- read.dtas(fl), "File contains unhandled alias variable in column: 5") expect_equal( c("persons", "counties"), names(dtas1) ) # read selected frames expect_warning( dtas2 <- read.dtas(fl, select.frames = c("persons", "counties")), "File contains unhandled alias variable in column: 5") expect_equal( c("persons", "counties"), names(dtas2) ) # read only frame counties dtas3 <- read.dtas(fl, select.frames = c("counties")) expect_equal( "counties", names(dtas3) ) # read frames with different arguments dtas4 <- read.dtas(fl, read.dta13.options = list(counties = list(select.cols = "median_income"), persons = list(select.cols = "income"))) expect_equal(names(dtas4$persons), "income") expect_equal(names(dtas4$counties), "median_income") # read frames with different arguments dtas5 <- read.dtas(fl, read.dta13.options = list(persons = list(select.cols = c("income", "countyid")))) expect_equal(ncol(dtas5$persons), 2) expect_equal(names(dtas5$persons), c("countyid", "income")) }) readstata13/tests/testthat/data/0000755000176200001440000000000015002513375016302 5ustar liggesusersreadstata13/tests/testthat/data/dta_119.dta0000644000176200001440000001242715002513375020144 0ustar liggesusers
119LSF24 Apr 2025 21:54
'JIUx dat%9sGSOaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaGSObbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbGSONA
readstata13/tests/testthat/data/dta_121.dta0000644000176200001440000001242715002513375020135 0ustar liggesusers
121LSF24 Apr 2025 21:54
'JIUx dat%9sGSOaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaGSObbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbGSONA
readstata13/tests/testthat/data/dta_118.dta0000644000176200001440000001242115002513375020135 0ustar liggesusers
118LSF24 Apr 2025 21:54
%HCOrdat%9sGSOaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaGSObbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbGSONA
readstata13/tests/testthat/data/dta_117.dta0000644000176200001440000001151015002513375020132 0ustar liggesusers
117LSF24 Apr 2025 21:54
Cy<Hdat%9sGSOaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaGSObbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbGSONA
readstata13/tests/testthat/data/dta_120.dta0000644000176200001440000001242115002513375020126 0ustar liggesusers
120LSF24 Apr 2025 21:54
%HCOrdat%9sGSOaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaGSObbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbGSONA
readstata13/tests/testthat/test_save.R0000644000176200001440000014401315002626101017504 0ustar liggesuserslibrary(readstata13) context("Saving datasets") # ToDo: Fix this. # load(system.file("extdata/statacar.RData", package="readstata13")) # # saveandload <- function(x, ...) { # file <- tempfile(pattern="readstata13_", fileext=".dta") # save.dta13(x, file=file, ...) # all(unlist(Map(identical, x, read.dta13(file)))) # } # # test_that("Saved file is identical: Version 118", { # expect_true(saveandload(statacar, version="118", convert.factors=T)) # }) datacompare <- function(x, y) { all(unlist(Map(all.equal, x, y))) } namescompare <- function(x, y){ all(identical(names(x), names(y))) } files <- c("dd118", "dd117", "dd115", "dd114", "dd113", "dd112", "dd111", "dd110", "dd108", "dd107", "dd106", "dd105", "dd104", "dd103", "dd102", "dd") data(mtcars) #### version #### if (readstata13:::dir.exists13("data")) unlink("data", recursive = TRUE) dir.create("data") dduni <- dd <- mtcars dduni$char_latin <- dd$char_latin <- sapply(1:nrow(dd), function(x) paste0(sample(c(letters, LETTERS), 9), collapse = "")) dduni$char_unicode <- rep(c("5ü省謹陵3_滑", "۱碌櫓۹ۘ戀"), 16) save.dta13(dd, "data/dta_15mp.dta", version = "15mp") save.dta13(dduni, "data/dta_121.dta", version = 121) save.dta13(dduni, "data/dta_120.dta", version = 120) save.dta13(dduni, "data/dta_119.dta", version = 119) save.dta13(dduni, "data/dta_118.dta", version = 118) save.dta13(dd, "data/dta_117.dta", version = 117) save.dta13(dd, "data/dta_115.dta", version = 115) save.dta13(dd, "data/dta_114.dta", version = 114) save.dta13(dd, "data/dta_113.dta", version = 113) save.dta13(dd, "data/dta_112.dta", version = 112) save.dta13(dd, "data/dta_111.dta", version = 111) save.dta13(dd, "data/dta_110.dta", version = 110) save.dta13(dd, "data/dta_108.dta", version = 108) save.dta13(dd, "data/dta_107.dta", version = 107) save.dta13(dd, "data/dta_106.dta", version = 106) save.dta13(dd, "data/dta_105.dta", version = 105) save.dta13(dd, "data/dta_104.dta", version = 104) save.dta13(dd, "data/dta_103.dta", version = 103) save.dta13(dd, "data/dta_102.dta", version = 102) dd15mp<- read.dta13("data/dta_15mp.dta") dd121 <- read.dta13("data/dta_121.dta") dd120 <- read.dta13("data/dta_120.dta") dd119 <- read.dta13("data/dta_119.dta") dd118 <- read.dta13("data/dta_118.dta") dd117 <- read.dta13("data/dta_117.dta") dd115 <- read.dta13("data/dta_115.dta") dd114 <- read.dta13("data/dta_114.dta") dd113 <- read.dta13("data/dta_113.dta") dd112 <- read.dta13("data/dta_112.dta") dd111 <- read.dta13("data/dta_111.dta") dd110 <- read.dta13("data/dta_110.dta") dd108 <- read.dta13("data/dta_108.dta") dd107 <- read.dta13("data/dta_107.dta") dd106 <- read.dta13("data/dta_106.dta") dd105 <- read.dta13("data/dta_105.dta") dd104 <- read.dta13("data/dta_104.dta") dd103 <- read.dta13("data/dta_103.dta") dd102 <- read.dta13("data/dta_102.dta") # rm -r unlink("data", recursive = TRUE) test_that("version", { expect_true(datacompare(dd, dd15mp)) expect_true(datacompare(dduni, dd121)) expect_true(datacompare(dduni, dd120)) expect_true(datacompare(dduni, dd119)) expect_true(datacompare(dduni, dd118)) expect_true(datacompare(dd, dd117)) expect_true(datacompare(dd, dd115)) expect_true(datacompare(dd, dd114)) expect_true(datacompare(dd, dd113)) expect_true(datacompare(dd, dd112)) expect_true(datacompare(dd, dd111)) expect_true(datacompare(dd, dd110)) expect_true(datacompare(dd, dd108)) expect_true(datacompare(dd, dd107)) expect_true(datacompare(dd, dd106)) expect_true(datacompare(dd, dd105)) expect_true(datacompare(dd, dd104)) expect_true(datacompare(dd, dd103)) expect_true(datacompare(dd, dd102)) }) # rm(list = files) #### compress #### if (readstata13:::dir.exists13("data")) unlink("data", recursive = TRUE) dir.create("data") dd <- mtcars save.dta13(dd, "data/dta_121.dta", version = 121, compress = TRUE) save.dta13(dd, "data/dta_120.dta", version = 120, compress = TRUE) save.dta13(dd, "data/dta_119.dta", version = 119, compress = TRUE) save.dta13(dd, "data/dta_118.dta", version = 118, compress = TRUE) save.dta13(dd, "data/dta_117.dta", version = 117, compress = TRUE) save.dta13(dd, "data/dta_115.dta", version = 115, compress = TRUE) save.dta13(dd, "data/dta_114.dta", version = 114, compress = TRUE) save.dta13(dd, "data/dta_113.dta", version = 113, compress = TRUE) save.dta13(dd, "data/dta_112.dta", version = 112, compress = TRUE) save.dta13(dd, "data/dta_111.dta", version = 111, compress = TRUE) save.dta13(dd, "data/dta_110.dta", version = 110, compress = TRUE) save.dta13(dd, "data/dta_108.dta", version = 108, compress = TRUE) save.dta13(dd, "data/dta_107.dta", version = 107, compress = TRUE) save.dta13(dd, "data/dta_106.dta", version = 106, compress = TRUE) save.dta13(dd, "data/dta_105.dta", version = 105, compress = TRUE) save.dta13(dd, "data/dta_104.dta", version = 104, compress = TRUE) save.dta13(dd, "data/dta_103.dta", version = 103, compress = TRUE) save.dta13(dd, "data/dta_102.dta", version = 102, compress = TRUE) dd121 <- read.dta13("data/dta_121.dta") dd120 <- read.dta13("data/dta_120.dta") dd119 <- read.dta13("data/dta_119.dta") dd118 <- read.dta13("data/dta_118.dta") dd117 <- read.dta13("data/dta_117.dta") dd115 <- read.dta13("data/dta_115.dta") dd114 <- read.dta13("data/dta_114.dta") dd113 <- read.dta13("data/dta_113.dta") dd112 <- read.dta13("data/dta_112.dta") dd111 <- read.dta13("data/dta_111.dta") dd110 <- read.dta13("data/dta_110.dta") dd108 <- read.dta13("data/dta_108.dta") dd107 <- read.dta13("data/dta_107.dta") dd106 <- read.dta13("data/dta_106.dta") dd105 <- read.dta13("data/dta_105.dta") dd104 <- read.dta13("data/dta_104.dta") dd103 <- read.dta13("data/dta_103.dta") dd102 <- read.dta13("data/dta_102.dta") # rm -r unlink("data", recursive = TRUE) test_that("compress", { expect_true(datacompare(dd, dd121)) expect_true(datacompare(dd, dd120)) expect_true(datacompare(dd, dd119)) expect_true(datacompare(dd, dd118)) expect_true(datacompare(dd, dd117)) expect_true(datacompare(dd, dd115)) expect_true(datacompare(dd, dd114)) expect_true(datacompare(dd, dd113)) expect_true(datacompare(dd, dd112)) expect_true(datacompare(dd, dd111)) expect_true(datacompare(dd, dd110)) expect_true(datacompare(dd, dd108)) expect_true(datacompare(dd, dd107)) expect_true(datacompare(dd, dd106)) expect_true(datacompare(dd, dd105)) expect_true(datacompare(dd, dd104)) expect_true(datacompare(dd, dd103)) expect_true(datacompare(dd, dd102)) }) # rm(list = files) #### convert.factors TRUE #### if (readstata13:::dir.exists13("data")) unlink("data", recursive = TRUE) dir.create("data") dd <- mtcars dd$am <- factor(x = dd$am, levels = c(0,1), labels = c("auto", "man")) save.dta13(dd, "data/dta_121.dta", version = 121, convert.factors = TRUE) save.dta13(dd, "data/dta_120.dta", version = 120, convert.factors = TRUE) save.dta13(dd, "data/dta_119.dta", version = 119, convert.factors = TRUE) save.dta13(dd, "data/dta_118.dta", version = 118, convert.factors = TRUE) save.dta13(dd, "data/dta_117.dta", version = 117, convert.factors = TRUE) save.dta13(dd, "data/dta_115.dta", version = 115, convert.factors = TRUE) save.dta13(dd, "data/dta_114.dta", version = 114, convert.factors = TRUE) save.dta13(dd, "data/dta_113.dta", version = 113, convert.factors = TRUE) save.dta13(dd, "data/dta_112.dta", version = 112, convert.factors = TRUE) save.dta13(dd, "data/dta_111.dta", version = 111, convert.factors = TRUE) save.dta13(dd, "data/dta_110.dta", version = 110, convert.factors = TRUE) save.dta13(dd, "data/dta_108.dta", version = 108, convert.factors = TRUE) save.dta13(dd, "data/dta_107.dta", version = 107, convert.factors = TRUE) # save.dta13(dd, "data/dta_106.dta", version = 106, convert.factors = TRUE) # save.dta13(dd, "data/dta_105.dta", version = 105, convert.factors = TRUE) # save.dta13(dd, "data/dta_104.dta", version = 104, convert.factors = TRUE) # save.dta13(dd, "data/dta_103.dta", version = 103, convert.factors = TRUE) # save.dta13(dd, "data/dta_102.dta", version = 102, convert.factors = TRUE) dd121 <- read.dta13("data/dta_121.dta") dd120 <- read.dta13("data/dta_120.dta") dd119 <- read.dta13("data/dta_119.dta") dd118 <- read.dta13("data/dta_118.dta") dd117 <- read.dta13("data/dta_117.dta") dd115 <- read.dta13("data/dta_115.dta") dd114 <- read.dta13("data/dta_114.dta") dd113 <- read.dta13("data/dta_113.dta") dd112 <- read.dta13("data/dta_112.dta") dd111 <- read.dta13("data/dta_111.dta") dd110 <- read.dta13("data/dta_110.dta") dd108 <- read.dta13("data/dta_108.dta") dd107 <- read.dta13("data/dta_107.dta") # dd106 <- read.dta13("data/dta_106.dta") # dd105 <- read.dta13("data/dta_105.dta") no factors # dd104 <- read.dta13("data/dta_104.dta") # dd103 <- read.dta13("data/dta_103.dta") # dd102 <- read.dta13("data/dta_102.dta") # rm -r unlink("data", recursive = TRUE) test_that("convert.factors TRUE", { expect_true(datacompare(dd, dd121)) expect_true(datacompare(dd, dd120)) expect_true(datacompare(dd, dd119)) expect_true(datacompare(dd, dd118)) expect_true(datacompare(dd, dd117)) expect_true(datacompare(dd, dd115)) expect_true(datacompare(dd, dd114)) expect_true(datacompare(dd, dd113)) expect_true(datacompare(dd, dd112)) expect_true(datacompare(dd, dd111)) expect_true(datacompare(dd, dd110)) expect_true(datacompare(dd, dd108)) expect_true(datacompare(dd, dd107)) # expect_true(datacompare(dd, dd106)) # expect_true(datacompare(dd, dd105)) no factors # expect_true(datacompare(dd, dd104)) # expect_true(datacompare(dd, dd103)) # expect_true(datacompare(dd, dd102)) }) # rm(list = files) #### convert.factors FALSE #### if (readstata13:::dir.exists13("data")) unlink("data", recursive = TRUE) dir.create("data") dd <- mtcars dd$am <- factor(x = dd$am, levels = c(0,1), labels = c("auto", "man")) save.dta13(dd, "data/dta_121.dta", version = 121, convert.factors = FALSE) save.dta13(dd, "data/dta_120.dta", version = 120, convert.factors = FALSE) save.dta13(dd, "data/dta_119.dta", version = 119, convert.factors = FALSE) save.dta13(dd, "data/dta_118.dta", version = 118, convert.factors = FALSE) save.dta13(dd, "data/dta_117.dta", version = 117, convert.factors = FALSE) save.dta13(dd, "data/dta_115.dta", version = 115, convert.factors = FALSE) save.dta13(dd, "data/dta_114.dta", version = 114, convert.factors = FALSE) save.dta13(dd, "data/dta_113.dta", version = 113, convert.factors = FALSE) save.dta13(dd, "data/dta_112.dta", version = 112, convert.factors = FALSE) save.dta13(dd, "data/dta_111.dta", version = 111, convert.factors = FALSE) save.dta13(dd, "data/dta_110.dta", version = 110, convert.factors = FALSE) save.dta13(dd, "data/dta_108.dta", version = 108, convert.factors = FALSE) save.dta13(dd, "data/dta_107.dta", version = 107, convert.factors = FALSE) # save.dta13(dd, "data/dta_106.dta", version = 106, convert.factors = FALSE) # save.dta13(dd, "data/dta_105.dta", version = 105, convert.factors = FALSE) # no factors | expect_warning ? # save.dta13(dd, "data/dta_104.dta", version = 104, convert.factors = FALSE) # save.dta13(dd, "data/dta_103.dta", version = 103, convert.factors = FALSE) # save.dta13(dd, "data/dta_102.dta", version = 102, convert.factors = FALSE) dd121 <- read.dta13("data/dta_121.dta") dd120 <- read.dta13("data/dta_120.dta") dd119 <- read.dta13("data/dta_119.dta") dd118 <- read.dta13("data/dta_118.dta") dd117 <- read.dta13("data/dta_117.dta") dd115 <- read.dta13("data/dta_115.dta") dd114 <- read.dta13("data/dta_114.dta") dd113 <- read.dta13("data/dta_113.dta") dd112 <- read.dta13("data/dta_112.dta") dd111 <- read.dta13("data/dta_111.dta") dd110 <- read.dta13("data/dta_110.dta") dd108 <- read.dta13("data/dta_108.dta") dd107 <- read.dta13("data/dta_107.dta") # dd106 <- read.dta13("data/dta_106.dta") # dd105 <- read.dta13("data/dta_105.dta") no factors | expect_warning ? # dd104 <- read.dta13("data/dta_104.dta") # dd103 <- read.dta13("data/dta_103.dta") # dd102 <- read.dta13("data/dta_102.dta") # add one (because of stupid factor) dd <- mtcars dd$am <- dd$am + 1 # rm -r unlink("data", recursive = TRUE) test_that("convert.factors TRUE", { expect_true(datacompare(dd, dd121)) expect_true(datacompare(dd, dd120)) expect_true(datacompare(dd, dd119)) expect_true(datacompare(dd, dd118)) expect_true(datacompare(dd, dd117)) expect_true(datacompare(dd, dd115)) expect_true(datacompare(dd, dd114)) expect_true(datacompare(dd, dd113)) expect_true(datacompare(dd, dd112)) expect_true(datacompare(dd, dd111)) expect_true(datacompare(dd, dd110)) expect_true(datacompare(dd, dd108)) expect_true(datacompare(dd, dd107)) # expect_true(datacompare(dd, dd106)) # expect_true(datacompare(dd, dd105)) no factors # expect_true(datacompare(dd, dd104)) # expect_true(datacompare(dd, dd103)) # expect_true(datacompare(dd, dd102)) }) # rm(list = files) #### add rownames TRUE #### if (readstata13:::dir.exists13("data")) unlink("data", recursive = TRUE) dir.create("data") dd <- mtcars save.dta13(dd, "data/dta_121.dta", version = 121, add.rownames = TRUE) save.dta13(dd, "data/dta_120.dta", version = 120, add.rownames = TRUE) save.dta13(dd, "data/dta_119.dta", version = 119, add.rownames = TRUE) save.dta13(dd, "data/dta_118.dta", version = 118, add.rownames = TRUE) save.dta13(dd, "data/dta_117.dta", version = 117, add.rownames = TRUE) save.dta13(dd, "data/dta_115.dta", version = 115, add.rownames = TRUE) save.dta13(dd, "data/dta_114.dta", version = 114, add.rownames = TRUE) save.dta13(dd, "data/dta_113.dta", version = 113, add.rownames = TRUE) save.dta13(dd, "data/dta_112.dta", version = 112, add.rownames = TRUE) save.dta13(dd, "data/dta_111.dta", version = 111, add.rownames = TRUE) save.dta13(dd, "data/dta_110.dta", version = 110, add.rownames = TRUE) save.dta13(dd, "data/dta_108.dta", version = 108, add.rownames = TRUE) save.dta13(dd, "data/dta_107.dta", version = 107, add.rownames = TRUE) save.dta13(dd, "data/dta_106.dta", version = 106, add.rownames = TRUE) save.dta13(dd, "data/dta_105.dta", version = 105, add.rownames = TRUE) save.dta13(dd, "data/dta_104.dta", version = 104, add.rownames = TRUE) save.dta13(dd, "data/dta_103.dta", version = 103, add.rownames = TRUE) save.dta13(dd, "data/dta_102.dta", version = 102, add.rownames = TRUE) dd121 <- read.dta13("data/dta_121.dta", add.rownames = TRUE) dd120 <- read.dta13("data/dta_120.dta", add.rownames = TRUE) dd119 <- read.dta13("data/dta_119.dta", add.rownames = TRUE) dd118 <- read.dta13("data/dta_118.dta", add.rownames = TRUE) dd117 <- read.dta13("data/dta_117.dta", add.rownames = TRUE) dd115 <- read.dta13("data/dta_115.dta", add.rownames = TRUE) dd114 <- read.dta13("data/dta_114.dta", add.rownames = TRUE) dd113 <- read.dta13("data/dta_113.dta", add.rownames = TRUE) dd112 <- read.dta13("data/dta_112.dta", add.rownames = TRUE) dd111 <- read.dta13("data/dta_111.dta", add.rownames = TRUE) dd110 <- read.dta13("data/dta_110.dta", add.rownames = TRUE) dd108 <- read.dta13("data/dta_108.dta", add.rownames = TRUE) dd107 <- read.dta13("data/dta_107.dta", add.rownames = TRUE) dd106 <- read.dta13("data/dta_106.dta", add.rownames = TRUE) dd105 <- read.dta13("data/dta_105.dta", add.rownames = TRUE) dd104 <- read.dta13("data/dta_104.dta", add.rownames = TRUE) dd103 <- read.dta13("data/dta_103.dta", add.rownames = TRUE) dd102 <- read.dta13("data/dta_102.dta", add.rownames = TRUE) # rm -r unlink("data", recursive = TRUE) test_that("add.rownames TRUE", { # Check that rownames are identical expect_true(identical(rownames(dd), rownames(dd121))) expect_true(identical(rownames(dd), rownames(dd120))) expect_true(identical(rownames(dd), rownames(dd119))) expect_true(identical(rownames(dd), rownames(dd118))) expect_true(identical(rownames(dd), rownames(dd117))) expect_true(identical(rownames(dd), rownames(dd115))) expect_true(identical(rownames(dd), rownames(dd114))) expect_true(identical(rownames(dd), rownames(dd113))) expect_true(identical(rownames(dd), rownames(dd112))) expect_true(identical(rownames(dd), rownames(dd111))) expect_true(identical(rownames(dd), rownames(dd110))) expect_true(identical(rownames(dd), rownames(dd108))) expect_true(identical(rownames(dd), rownames(dd107))) expect_true(identical(rownames(dd), rownames(dd106))) expect_true(identical(rownames(dd), rownames(dd105))) expect_true(identical(rownames(dd), rownames(dd104))) expect_true(identical(rownames(dd), rownames(dd103))) expect_true(identical(rownames(dd), rownames(dd102))) # Check that data is identical expect_true(datacompare(dd, dd121)) expect_true(datacompare(dd, dd120)) expect_true(datacompare(dd, dd119)) expect_true(datacompare(dd, dd118)) expect_true(datacompare(dd, dd117)) expect_true(datacompare(dd, dd115)) expect_true(datacompare(dd, dd114)) expect_true(datacompare(dd, dd113)) expect_true(datacompare(dd, dd112)) expect_true(datacompare(dd, dd111)) expect_true(datacompare(dd, dd110)) expect_true(datacompare(dd, dd108)) expect_true(datacompare(dd, dd107)) expect_true(datacompare(dd, dd106)) expect_true(datacompare(dd, dd105)) expect_true(datacompare(dd, dd104)) expect_true(datacompare(dd, dd103)) expect_true(datacompare(dd, dd102)) }) # rm(list = files) #### data label TRUE #### dl <- "mtcars data file" if (readstata13:::dir.exists13("data")) unlink("data", recursive = TRUE) dir.create("data") dd <- mtcars save.dta13(dd, "data/dta_121.dta", version = 121, data.label = dl) save.dta13(dd, "data/dta_120.dta", version = 120, data.label = dl) save.dta13(dd, "data/dta_119.dta", version = 119, data.label = dl) save.dta13(dd, "data/dta_118.dta", version = 118, data.label = dl) save.dta13(dd, "data/dta_117.dta", version = 117, data.label = dl) save.dta13(dd, "data/dta_115.dta", version = 115, data.label = dl) save.dta13(dd, "data/dta_114.dta", version = 114, data.label = dl) save.dta13(dd, "data/dta_113.dta", version = 113, data.label = dl) save.dta13(dd, "data/dta_112.dta", version = 112, data.label = dl) save.dta13(dd, "data/dta_111.dta", version = 111, data.label = dl) save.dta13(dd, "data/dta_110.dta", version = 110, data.label = dl) save.dta13(dd, "data/dta_108.dta", version = 108, data.label = dl) save.dta13(dd, "data/dta_107.dta", version = 107, data.label = dl) save.dta13(dd, "data/dta_106.dta", version = 106, data.label = dl) save.dta13(dd, "data/dta_105.dta", version = 105, data.label = dl) save.dta13(dd, "data/dta_104.dta", version = 104, data.label = dl) save.dta13(dd, "data/dta_103.dta", version = 103, data.label = dl) # save.dta13(dd, "data/dta_102.dta", version = 102, data.label = dl) # no data label dd121 <- read.dta13("data/dta_121.dta") dd120 <- read.dta13("data/dta_120.dta") dd119 <- read.dta13("data/dta_119.dta") dd118 <- read.dta13("data/dta_118.dta") dd117 <- read.dta13("data/dta_117.dta") dd115 <- read.dta13("data/dta_115.dta") dd114 <- read.dta13("data/dta_114.dta") dd113 <- read.dta13("data/dta_113.dta") dd112 <- read.dta13("data/dta_112.dta") dd111 <- read.dta13("data/dta_111.dta") dd110 <- read.dta13("data/dta_110.dta") dd108 <- read.dta13("data/dta_108.dta") dd107 <- read.dta13("data/dta_107.dta") dd106 <- read.dta13("data/dta_106.dta") dd105 <- read.dta13("data/dta_105.dta") dd104 <- read.dta13("data/dta_104.dta") dd103 <- read.dta13("data/dta_103.dta") # dd102 <- read.dta13("data/dta_102.dta") unlink("data", recursive = TRUE) test_that("data label", { # Check that rownames are identical expect_equal(dl, attr(dd121, "datalabel")) expect_equal(dl, attr(dd120, "datalabel")) expect_equal(dl, attr(dd119, "datalabel")) expect_equal(dl, attr(dd118, "datalabel")) expect_equal(dl, attr(dd117, "datalabel")) expect_equal(dl, attr(dd115, "datalabel")) expect_equal(dl, attr(dd114, "datalabel")) expect_equal(dl, attr(dd113, "datalabel")) expect_equal(dl, attr(dd112, "datalabel")) expect_equal(dl, attr(dd111, "datalabel")) expect_equal(dl, attr(dd110, "datalabel")) expect_equal(dl, attr(dd108, "datalabel")) expect_equal(dl, attr(dd107, "datalabel")) expect_equal(dl, attr(dd106, "datalabel")) expect_equal(dl, attr(dd105, "datalabel")) expect_equal(dl, attr(dd104, "datalabel")) expect_equal(dl, attr(dd103, "datalabel")) # expect_equal(dl, attr(dd102, "datalabel")) }) # rm(list = files) #### convert dates TRUE #### if (readstata13:::dir.exists13("data")) unlink("data", recursive = TRUE) dir.create("data") td <- c("2001-05-15", "1999-04-01", "1975-11-15", "1960-08-26", "1987-12-16") tc <- c("2011-06-25 05:15:06", "2011-03-13 08:30:45", "2011-04-09 10:17:08", "2012-02-11 10:30:12", "2012-08-01 06:45:59") tc_hh_mm <- c("2011-06-29 10:27:00", "2011-03-26 02:15:00", "2011-04-09 19:35:00", "2012-02-16 02:15:00", "2012-08-02 11:59:00") ty <- c("2011-01-01", "2011-01-01", "2011-01-01", "2012-01-01", "2012-01-01") tm <- c("2011-06-01", "2011-03-01", "2011-04-01", "2012-02-01", "2012-08-01") tq <- c("2011-04-01", "2011-01-01", "2011-04-01", "2012-01-01", "2012-07-01") dd <- data.frame(td = as.Date(td), tc = as.POSIXct(tc, tz = "GMT"), tc_hh_mm = as.POSIXct(tc_hh_mm, tz = "GMT"), ty = as.Date(ty), tm = as.Date(tm), tq = as.Date(tq)) save.dta13(dd, "data/dta_121.dta", version = 121, convert.dates = TRUE) save.dta13(dd, "data/dta_120.dta", version = 120, convert.dates = TRUE) save.dta13(dd, "data/dta_119.dta", version = 119, convert.dates = TRUE) save.dta13(dd, "data/dta_118.dta", version = 118, convert.dates = TRUE) save.dta13(dd, "data/dta_117.dta", version = 117, convert.dates = TRUE) save.dta13(dd, "data/dta_115.dta", version = 115, convert.dates = TRUE) save.dta13(dd, "data/dta_114.dta", version = 114, convert.dates = TRUE) save.dta13(dd, "data/dta_113.dta", version = 113, convert.dates = TRUE) save.dta13(dd, "data/dta_112.dta", version = 112, convert.dates = TRUE) save.dta13(dd, "data/dta_111.dta", version = 111, convert.dates = TRUE) save.dta13(dd, "data/dta_110.dta", version = 110, convert.dates = TRUE) save.dta13(dd, "data/dta_108.dta", version = 108, convert.dates = TRUE) save.dta13(dd, "data/dta_107.dta", version = 107, convert.dates = TRUE) save.dta13(dd, "data/dta_106.dta", version = 106, convert.dates = TRUE) save.dta13(dd, "data/dta_105.dta", version = 105, convert.dates = TRUE) save.dta13(dd, "data/dta_104.dta", version = 104, convert.dates = TRUE) save.dta13(dd, "data/dta_103.dta", version = 103, convert.dates = TRUE) save.dta13(dd, "data/dta_102.dta", version = 102, convert.dates = TRUE) dd121 <- read.dta13("data/dta_121.dta") dd120 <- read.dta13("data/dta_120.dta") dd119 <- read.dta13("data/dta_119.dta") dd118 <- read.dta13("data/dta_118.dta") dd117 <- read.dta13("data/dta_117.dta") dd115 <- read.dta13("data/dta_115.dta") dd114 <- read.dta13("data/dta_114.dta") dd113 <- read.dta13("data/dta_113.dta") dd112 <- read.dta13("data/dta_112.dta") dd111 <- read.dta13("data/dta_111.dta") dd110 <- read.dta13("data/dta_110.dta") dd108 <- read.dta13("data/dta_108.dta") dd107 <- read.dta13("data/dta_107.dta") dd106 <- read.dta13("data/dta_106.dta") dd105 <- read.dta13("data/dta_105.dta") dd104 <- read.dta13("data/dta_104.dta") dd103 <- read.dta13("data/dta_103.dta") dd102 <- read.dta13("data/dta_102.dta") unlink("data", recursive = TRUE) test_that("convert.dates TRUE", { # Check that rownames are identical expect_true(datacompare(dd, dd121)) expect_true(datacompare(dd, dd120)) expect_true(datacompare(dd, dd119)) expect_true(datacompare(dd, dd118)) expect_true(datacompare(dd, dd117)) expect_true(datacompare(dd, dd115)) expect_true(datacompare(dd, dd114)) expect_true(datacompare(dd, dd113)) expect_true(datacompare(dd, dd112)) expect_true(datacompare(dd, dd111)) expect_true(datacompare(dd, dd110)) expect_true(datacompare(dd, dd108)) expect_true(datacompare(dd, dd107)) expect_true(datacompare(dd, dd106)) expect_true(datacompare(dd, dd105)) expect_true(datacompare(dd, dd104)) expect_true(datacompare(dd, dd103)) expect_true(datacompare(dd, dd102)) }) # rm(list = files) #### strl save #### if (readstata13:::dir.exists13("data")) unlink("data", recursive = TRUE) dir.create("data") # strLs can be of length any length up to 2 billion characters. Starting with # 2046 a string is handled as a strL dd <- data.frame( dat = c(paste(replicate(2046, "a"), collapse = ""), paste(replicate(2046, "b"), collapse = "")), stringsAsFactors = FALSE) save.dta13(dd, "data/dta_121.dta", version = 121) save.dta13(dd, "data/dta_120.dta", version = 120) save.dta13(dd, "data/dta_119.dta", version = 119) save.dta13(dd, "data/dta_118.dta", version = 118) save.dta13(dd, "data/dta_117.dta", version = 117) # save.dta13(dd, "data/dta_115.dta", version = 115) # no strl # save.dta13(dd, "data/dta_114.dta", version = 114) # save.dta13(dd, "data/dta_113.dta", version = 113) # save.dta13(dd, "data/dta_112.dta", version = 112) # save.dta13(dd, "data/dta_111.dta", version = 111) # save.dta13(dd, "data/dta_110.dta", version = 110) # save.dta13(dd, "data/dta_108.dta", version = 108) # save.dta13(dd, "data/dta_107.dta", version = 107) # save.dta13(dd, "data/dta_106.dta", version = 106) # save.dta13(dd, "data/dta_105.dta", version = 105) # save.dta13(dd, "data/dta_104.dta", version = 104) # save.dta13(dd, "data/dta_103.dta", version = 103) # save.dta13(dd, "data/dta_102.dta", version = 102) dd121 <- read.dta13("data/dta_121.dta", replace.strl = TRUE) dd120 <- read.dta13("data/dta_120.dta", replace.strl = TRUE) dd119 <- read.dta13("data/dta_119.dta", replace.strl = TRUE) dd118 <- read.dta13("data/dta_118.dta", replace.strl = TRUE) dd117 <- read.dta13("data/dta_117.dta", replace.strl = TRUE) # dd115 <- read.dta13("data/dta_115.dta") # dd114 <- read.dta13("data/dta_114.dta") # dd113 <- read.dta13("data/dta_113.dta") # dd112 <- read.dta13("data/dta_112.dta") # dd111 <- read.dta13("data/dta_111.dta") # dd110 <- read.dta13("data/dta_110.dta") # dd108 <- read.dta13("data/dta_108.dta") # dd107 <- read.dta13("data/dta_107.dta") # dd106 <- read.dta13("data/dta_106.dta") # dd105 <- read.dta13("data/dta_105.dta") # dd104 <- read.dta13("data/dta_104.dta") # dd103 <- read.dta13("data/dta_103.dta") # dd102 <- read.dta13("data/dta_102.dta") unlink("data", recursive = TRUE) test_that("replace.strl TRUE", { # Check that rownames are identical expect_true(datacompare(dd, dd121)) expect_true(datacompare(dd, dd120)) expect_true(datacompare(dd, dd119)) expect_true(datacompare(dd, dd118)) expect_true(datacompare(dd, dd117)) # expect_true(datacompare(dd, dd115)) # expect_true(datacompare(dd, dd114)) # expect_true(datacompare(dd, dd113)) # expect_true(datacompare(dd, dd112)) # expect_true(datacompare(dd, dd111)) # expect_true(datacompare(dd, dd110)) # expect_true(datacompare(dd, dd108)) # expect_true(datacompare(dd, dd107)) # expect_true(datacompare(dd, dd106)) # expect_true(datacompare(dd, dd105)) # expect_true(datacompare(dd, dd104)) # expect_true(datacompare(dd, dd103)) # expect_true(datacompare(dd, dd102)) }) # rm(list = files) #### convert.underscore save #### if (readstata13:::dir.exists13("data")) unlink("data", recursive = TRUE) dir.create("data") dd <- data.frame(x.1 = 1) save.dta13(dd, "data/dta_121.dta", version = 121, convert.underscore = TRUE) save.dta13(dd, "data/dta_120.dta", version = 120, convert.underscore = TRUE) save.dta13(dd, "data/dta_119.dta", version = 119, convert.underscore = TRUE) save.dta13(dd, "data/dta_118.dta", version = 118, convert.underscore = TRUE) save.dta13(dd, "data/dta_117.dta", version = 117, convert.underscore = TRUE) save.dta13(dd, "data/dta_115.dta", version = 115, convert.underscore = TRUE) save.dta13(dd, "data/dta_114.dta", version = 114, convert.underscore = TRUE) save.dta13(dd, "data/dta_113.dta", version = 113, convert.underscore = TRUE) save.dta13(dd, "data/dta_112.dta", version = 112, convert.underscore = TRUE) save.dta13(dd, "data/dta_111.dta", version = 111, convert.underscore = TRUE) save.dta13(dd, "data/dta_110.dta", version = 110, convert.underscore = TRUE) save.dta13(dd, "data/dta_108.dta", version = 108, convert.underscore = TRUE) save.dta13(dd, "data/dta_107.dta", version = 107, convert.underscore = TRUE) save.dta13(dd, "data/dta_106.dta", version = 106, convert.underscore = TRUE) save.dta13(dd, "data/dta_105.dta", version = 105, convert.underscore = TRUE) save.dta13(dd, "data/dta_104.dta", version = 104, convert.underscore = TRUE) save.dta13(dd, "data/dta_103.dta", version = 103, convert.underscore = TRUE) save.dta13(dd, "data/dta_102.dta", version = 102, convert.underscore = TRUE) dd121 <- read.dta13("data/dta_121.dta") dd120 <- read.dta13("data/dta_120.dta") dd119 <- read.dta13("data/dta_119.dta") dd118 <- read.dta13("data/dta_118.dta") dd117 <- read.dta13("data/dta_117.dta") dd115 <- read.dta13("data/dta_115.dta") dd114 <- read.dta13("data/dta_114.dta") dd113 <- read.dta13("data/dta_113.dta") dd112 <- read.dta13("data/dta_112.dta") dd111 <- read.dta13("data/dta_111.dta") dd110 <- read.dta13("data/dta_110.dta") dd108 <- read.dta13("data/dta_108.dta") dd107 <- read.dta13("data/dta_107.dta") dd106 <- read.dta13("data/dta_106.dta") dd105 <- read.dta13("data/dta_105.dta") dd104 <- read.dta13("data/dta_104.dta") dd103 <- read.dta13("data/dta_103.dta") dd102 <- read.dta13("data/dta_102.dta") unlink("data", recursive = TRUE) names(dd) <- "x_1" test_that("convert.underscore TRUE", { # check numerics expect_true(datacompare(dd, dd121)) expect_true(datacompare(dd, dd120)) expect_true(datacompare(dd, dd119)) expect_true(datacompare(dd, dd118)) expect_true(datacompare(dd, dd117)) expect_true(datacompare(dd, dd115)) expect_true(datacompare(dd, dd114)) expect_true(datacompare(dd, dd113)) expect_true(datacompare(dd, dd112)) expect_true(datacompare(dd, dd111)) expect_true(datacompare(dd, dd110)) expect_true(datacompare(dd, dd108)) expect_true(datacompare(dd, dd107)) expect_true(datacompare(dd, dd106)) expect_true(datacompare(dd, dd105)) expect_true(datacompare(dd, dd104)) expect_true(datacompare(dd, dd103)) expect_true(datacompare(dd, dd102)) # check names expect_true(namescompare(dd, dd121)) expect_true(namescompare(dd, dd120)) expect_true(namescompare(dd, dd119)) expect_true(namescompare(dd, dd118)) expect_true(namescompare(dd, dd117)) expect_true(namescompare(dd, dd115)) expect_true(namescompare(dd, dd114)) expect_true(namescompare(dd, dd113)) expect_true(namescompare(dd, dd112)) expect_true(namescompare(dd, dd111)) expect_true(namescompare(dd, dd110)) expect_true(namescompare(dd, dd108)) expect_true(namescompare(dd, dd107)) expect_true(namescompare(dd, dd106)) expect_true(namescompare(dd, dd105)) expect_true(namescompare(dd, dd104)) expect_true(namescompare(dd, dd103)) expect_true(namescompare(dd, dd102)) }) # rm(list = files) #### select.rows #### if (readstata13:::dir.exists13("data")) unlink("data", recursive = TRUE) dir.create("data") dd <- mtcars save.dta13(dd, "data/dta_121.dta", version = 121) save.dta13(dd, "data/dta_120.dta", version = 120) save.dta13(dd, "data/dta_119.dta", version = 119) save.dta13(dd, "data/dta_118.dta", version = 118) save.dta13(dd, "data/dta_117.dta", version = 117) save.dta13(dd, "data/dta_115.dta", version = 115) save.dta13(dd, "data/dta_114.dta", version = 114) save.dta13(dd, "data/dta_113.dta", version = 113) save.dta13(dd, "data/dta_112.dta", version = 112) save.dta13(dd, "data/dta_111.dta", version = 111) save.dta13(dd, "data/dta_110.dta", version = 110) save.dta13(dd, "data/dta_108.dta", version = 108) save.dta13(dd, "data/dta_107.dta", version = 107) save.dta13(dd, "data/dta_106.dta", version = 106) save.dta13(dd, "data/dta_105.dta", version = 105) save.dta13(dd, "data/dta_104.dta", version = 104) save.dta13(dd, "data/dta_103.dta", version = 103) save.dta13(dd, "data/dta_102.dta", version = 102) dd121 <- read.dta13("data/dta_121.dta", select.rows = 5) dd120 <- read.dta13("data/dta_120.dta", select.rows = 5) dd119 <- read.dta13("data/dta_119.dta", select.rows = 5) dd118 <- read.dta13("data/dta_118.dta", select.rows = 5) dd117 <- read.dta13("data/dta_117.dta", select.rows = 5) dd115 <- read.dta13("data/dta_115.dta", select.rows = 5) dd114 <- read.dta13("data/dta_114.dta", select.rows = 5) dd113 <- read.dta13("data/dta_113.dta", select.rows = 5) dd112 <- read.dta13("data/dta_112.dta", select.rows = 5) dd111 <- read.dta13("data/dta_111.dta", select.rows = 5) dd110 <- read.dta13("data/dta_110.dta", select.rows = 5) dd108 <- read.dta13("data/dta_108.dta", select.rows = 5) dd107 <- read.dta13("data/dta_107.dta", select.rows = 5) dd106 <- read.dta13("data/dta_106.dta", select.rows = 5) dd105 <- read.dta13("data/dta_105.dta", select.rows = 5) dd104 <- read.dta13("data/dta_104.dta", select.rows = 5) dd103 <- read.dta13("data/dta_103.dta", select.rows = 5) dd102 <- read.dta13("data/dta_102.dta", select.rows = 5) unlink("data", recursive = TRUE) dd <- dd[1:5,] test_that("select.rows = 5", { # check numerics expect_true(datacompare(dd, dd121)) expect_true(datacompare(dd, dd120)) expect_true(datacompare(dd, dd119)) expect_true(datacompare(dd, dd118)) expect_true(datacompare(dd, dd117)) expect_true(datacompare(dd, dd115)) expect_true(datacompare(dd, dd114)) expect_true(datacompare(dd, dd113)) expect_true(datacompare(dd, dd112)) expect_true(datacompare(dd, dd111)) expect_true(datacompare(dd, dd110)) expect_true(datacompare(dd, dd108)) expect_true(datacompare(dd, dd107)) expect_true(datacompare(dd, dd106)) expect_true(datacompare(dd, dd105)) expect_true(datacompare(dd, dd104)) expect_true(datacompare(dd, dd103)) expect_true(datacompare(dd, dd102)) }) if (readstata13:::dir.exists13("data")) unlink("data", recursive = TRUE) dir.create("data") dd <- mtcars save.dta13(dd, "data/dta_121.dta", version = 121) save.dta13(dd, "data/dta_120.dta", version = 120) save.dta13(dd, "data/dta_119.dta", version = 119) save.dta13(dd, "data/dta_118.dta", version = 118) save.dta13(dd, "data/dta_117.dta", version = 117) save.dta13(dd, "data/dta_115.dta", version = 115) save.dta13(dd, "data/dta_114.dta", version = 114) save.dta13(dd, "data/dta_113.dta", version = 113) save.dta13(dd, "data/dta_112.dta", version = 112) save.dta13(dd, "data/dta_111.dta", version = 111) save.dta13(dd, "data/dta_110.dta", version = 110) save.dta13(dd, "data/dta_108.dta", version = 108) save.dta13(dd, "data/dta_107.dta", version = 107) save.dta13(dd, "data/dta_106.dta", version = 106) save.dta13(dd, "data/dta_105.dta", version = 105) save.dta13(dd, "data/dta_104.dta", version = 104) save.dta13(dd, "data/dta_103.dta", version = 103) save.dta13(dd, "data/dta_102.dta", version = 102) dd121 <- read.dta13("data/dta_121.dta", select.rows = c(5,10)) dd120 <- read.dta13("data/dta_120.dta", select.rows = c(5,10)) dd119 <- read.dta13("data/dta_119.dta", select.rows = c(5,10)) dd118 <- read.dta13("data/dta_118.dta", select.rows = c(5,10)) dd117 <- read.dta13("data/dta_117.dta", select.rows = c(5,10)) dd115 <- read.dta13("data/dta_115.dta", select.rows = c(5,10)) dd114 <- read.dta13("data/dta_114.dta", select.rows = c(5,10)) dd113 <- read.dta13("data/dta_113.dta", select.rows = c(5,10)) dd112 <- read.dta13("data/dta_112.dta", select.rows = c(5,10)) dd111 <- read.dta13("data/dta_111.dta", select.rows = c(5,10)) dd110 <- read.dta13("data/dta_110.dta", select.rows = c(5,10)) dd108 <- read.dta13("data/dta_108.dta", select.rows = c(5,10)) dd107 <- read.dta13("data/dta_107.dta", select.rows = c(5,10)) dd106 <- read.dta13("data/dta_106.dta", select.rows = c(5,10)) dd105 <- read.dta13("data/dta_105.dta", select.rows = c(5,10)) dd104 <- read.dta13("data/dta_104.dta", select.rows = c(5,10)) dd103 <- read.dta13("data/dta_103.dta", select.rows = c(5,10)) dd102 <- read.dta13("data/dta_102.dta", select.rows = c(5,10)) unlink("data", recursive = TRUE) dd <- dd[5:10,] test_that("select.rows = c(5,10)", { # check numerics expect_true(datacompare(dd, dd121)) expect_true(datacompare(dd, dd120)) expect_true(datacompare(dd, dd119)) expect_true(datacompare(dd, dd118)) expect_true(datacompare(dd, dd117)) expect_true(datacompare(dd, dd115)) expect_true(datacompare(dd, dd114)) expect_true(datacompare(dd, dd113)) expect_true(datacompare(dd, dd112)) expect_true(datacompare(dd, dd111)) expect_true(datacompare(dd, dd110)) expect_true(datacompare(dd, dd108)) expect_true(datacompare(dd, dd107)) expect_true(datacompare(dd, dd106)) expect_true(datacompare(dd, dd105)) expect_true(datacompare(dd, dd104)) expect_true(datacompare(dd, dd103)) expect_true(datacompare(dd, dd102)) }) # rm(list = files) #### select.cols #### if (readstata13:::dir.exists13("data")) unlink("data", recursive = TRUE) dir.create("data") dd <- mtcars save.dta13(dd, "data/dta_121.dta", version = 121) save.dta13(dd, "data/dta_120.dta", version = 120) save.dta13(dd, "data/dta_119.dta", version = 119) save.dta13(dd, "data/dta_118.dta", version = 118) save.dta13(dd, "data/dta_117.dta", version = 117) save.dta13(dd, "data/dta_115.dta", version = 115) save.dta13(dd, "data/dta_114.dta", version = 114) save.dta13(dd, "data/dta_113.dta", version = 113) save.dta13(dd, "data/dta_112.dta", version = 112) save.dta13(dd, "data/dta_111.dta", version = 111) save.dta13(dd, "data/dta_110.dta", version = 110) save.dta13(dd, "data/dta_108.dta", version = 108) save.dta13(dd, "data/dta_107.dta", version = 107) save.dta13(dd, "data/dta_106.dta", version = 106) save.dta13(dd, "data/dta_105.dta", version = 105) save.dta13(dd, "data/dta_104.dta", version = 104) save.dta13(dd, "data/dta_103.dta", version = 103) save.dta13(dd, "data/dta_102.dta", version = 102) dd121 <- read.dta13("data/dta_121.dta", select.cols = c("disp", "drat")) dd120 <- read.dta13("data/dta_120.dta", select.cols = c("disp", "drat")) dd119 <- read.dta13("data/dta_119.dta", select.cols = c("disp", "drat")) dd118 <- read.dta13("data/dta_118.dta", select.cols = c("disp", "drat")) dd117 <- read.dta13("data/dta_117.dta", select.cols = c("disp", "drat")) dd115 <- read.dta13("data/dta_115.dta", select.cols = c("disp", "drat")) dd114 <- read.dta13("data/dta_114.dta", select.cols = c("disp", "drat")) dd113 <- read.dta13("data/dta_113.dta", select.cols = c("disp", "drat")) dd112 <- read.dta13("data/dta_112.dta", select.cols = c("disp", "drat")) dd111 <- read.dta13("data/dta_111.dta", select.cols = c("disp", "drat")) dd110 <- read.dta13("data/dta_110.dta", select.cols = c("disp", "drat")) dd108 <- read.dta13("data/dta_108.dta", select.cols = c("disp", "drat")) dd107 <- read.dta13("data/dta_107.dta", select.cols = c("disp", "drat")) dd106 <- read.dta13("data/dta_106.dta", select.cols = c("disp", "drat")) dd105 <- read.dta13("data/dta_105.dta", select.cols = c("disp", "drat")) dd104 <- read.dta13("data/dta_104.dta", select.cols = c("disp", "drat")) dd103 <- read.dta13("data/dta_103.dta", select.cols = c("disp", "drat")) dd102 <- read.dta13("data/dta_102.dta", select.cols = c("disp", "drat")) unlink("data", recursive = TRUE) dd <- dd[,c("disp", "drat")] test_that("select.cols = c('disp', 'drat')", { # check numerics expect_true(datacompare(dd, dd121)) expect_true(datacompare(dd, dd120)) expect_true(datacompare(dd, dd119)) expect_true(datacompare(dd, dd118)) expect_true(datacompare(dd, dd117)) expect_true(datacompare(dd, dd115)) expect_true(datacompare(dd, dd114)) expect_true(datacompare(dd, dd113)) expect_true(datacompare(dd, dd112)) expect_true(datacompare(dd, dd111)) expect_true(datacompare(dd, dd110)) expect_true(datacompare(dd, dd108)) expect_true(datacompare(dd, dd107)) expect_true(datacompare(dd, dd106)) expect_true(datacompare(dd, dd105)) expect_true(datacompare(dd, dd104)) expect_true(datacompare(dd, dd103)) expect_true(datacompare(dd, dd102)) }) if (readstata13:::dir.exists13("data")) unlink("data", recursive = TRUE) dir.create("data") dd <- mtcars save.dta13(dd, "data/dta_121.dta", version = 121) save.dta13(dd, "data/dta_120.dta", version = 120) save.dta13(dd, "data/dta_119.dta", version = 119) save.dta13(dd, "data/dta_118.dta", version = 118) save.dta13(dd, "data/dta_117.dta", version = 117) save.dta13(dd, "data/dta_115.dta", version = 115) save.dta13(dd, "data/dta_114.dta", version = 114) save.dta13(dd, "data/dta_113.dta", version = 113) save.dta13(dd, "data/dta_112.dta", version = 112) save.dta13(dd, "data/dta_111.dta", version = 111) save.dta13(dd, "data/dta_110.dta", version = 110) save.dta13(dd, "data/dta_108.dta", version = 108) save.dta13(dd, "data/dta_107.dta", version = 107) save.dta13(dd, "data/dta_106.dta", version = 106) save.dta13(dd, "data/dta_105.dta", version = 105) save.dta13(dd, "data/dta_104.dta", version = 104) save.dta13(dd, "data/dta_103.dta", version = 103) save.dta13(dd, "data/dta_102.dta", version = 102) dd121 <- read.dta13("data/dta_121.dta", select.cols = c(3, 5)) dd120 <- read.dta13("data/dta_120.dta", select.cols = c(3, 5)) dd119 <- read.dta13("data/dta_119.dta", select.cols = c(3, 5)) dd118 <- read.dta13("data/dta_118.dta", select.cols = c(3, 5)) dd117 <- read.dta13("data/dta_117.dta", select.cols = c(3, 5)) dd115 <- read.dta13("data/dta_115.dta", select.cols = c(3, 5)) dd114 <- read.dta13("data/dta_114.dta", select.cols = c(3, 5)) dd113 <- read.dta13("data/dta_113.dta", select.cols = c(3, 5)) dd112 <- read.dta13("data/dta_112.dta", select.cols = c(3, 5)) dd111 <- read.dta13("data/dta_111.dta", select.cols = c(3, 5)) dd110 <- read.dta13("data/dta_110.dta", select.cols = c(3, 5)) dd108 <- read.dta13("data/dta_108.dta", select.cols = c(3, 5)) dd107 <- read.dta13("data/dta_107.dta", select.cols = c(3, 5)) dd106 <- read.dta13("data/dta_106.dta", select.cols = c(3, 5)) dd105 <- read.dta13("data/dta_105.dta", select.cols = c(3, 5)) dd104 <- read.dta13("data/dta_104.dta", select.cols = c(3, 5)) dd103 <- read.dta13("data/dta_103.dta", select.cols = c(3, 5)) dd102 <- read.dta13("data/dta_102.dta", select.cols = c(3, 5)) unlink("data", recursive = TRUE) dd <- dd[,c("disp", "drat")] test_that("select.cols = c('disp', 'drat')", { # check numerics expect_true(datacompare(dd, dd121)) expect_true(datacompare(dd, dd120)) expect_true(datacompare(dd, dd119)) expect_true(datacompare(dd, dd118)) expect_true(datacompare(dd, dd117)) expect_true(datacompare(dd, dd115)) expect_true(datacompare(dd, dd114)) expect_true(datacompare(dd, dd113)) expect_true(datacompare(dd, dd112)) expect_true(datacompare(dd, dd111)) expect_true(datacompare(dd, dd110)) expect_true(datacompare(dd, dd108)) expect_true(datacompare(dd, dd107)) expect_true(datacompare(dd, dd106)) expect_true(datacompare(dd, dd105)) expect_true(datacompare(dd, dd104)) expect_true(datacompare(dd, dd103)) expect_true(datacompare(dd, dd102)) }) # rm(list = files) #### expansion.fields #### if (readstata13:::dir.exists13("data")) unlink("data", recursive = TRUE) dir.create("data") dd <- mtcars # create expansion.fields: In stata use command notes: They are constructed as # follows: # # 1. on what is the note : can be _dta or a variable name # 2. string "note" + number of note # 3. the note # initializiation of a one line note on a dta-file is done using: Ordering does # not matter: # # line1: _dta note0 1 # # line2: _dta note1 a note attached to the dta ef <- list( c("_dta", "note1", "note written in R"), c("_dta", "note0", "1"), c("mpg", "note1", "Miles/(US) gallon"), c("mpg", "note0", "1") ) attr(dd, "expansion.fields") <- ef save.dta13(dd, "data/dta_121.dta", version = 121) save.dta13(dd, "data/dta_120.dta", version = 120) save.dta13(dd, "data/dta_119.dta", version = 119) save.dta13(dd, "data/dta_118.dta", version = 118) save.dta13(dd, "data/dta_117.dta", version = 117) save.dta13(dd, "data/dta_115.dta", version = 115) save.dta13(dd, "data/dta_114.dta", version = 114) save.dta13(dd, "data/dta_113.dta", version = 113) save.dta13(dd, "data/dta_112.dta", version = 112) save.dta13(dd, "data/dta_111.dta", version = 111) save.dta13(dd, "data/dta_110.dta", version = 110) save.dta13(dd, "data/dta_108.dta", version = 108) save.dta13(dd, "data/dta_107.dta", version = 107) save.dta13(dd, "data/dta_106.dta", version = 106) save.dta13(dd, "data/dta_105.dta", version = 105) # save.dta13(dd, "data/dta_104.dta", version = 104) # save.dta13(dd, "data/dta_103.dta", version = 103) # save.dta13(dd, "data/dta_102.dta", version = 102) dd121 <- attr(read.dta13("data/dta_121.dta"), "expansion.fields") dd120 <- attr(read.dta13("data/dta_120.dta"), "expansion.fields") dd119 <- attr(read.dta13("data/dta_119.dta"), "expansion.fields") dd118 <- attr(read.dta13("data/dta_118.dta"), "expansion.fields") dd117 <- attr(read.dta13("data/dta_117.dta"), "expansion.fields") dd115 <- attr(read.dta13("data/dta_115.dta"), "expansion.fields") dd114 <- attr(read.dta13("data/dta_114.dta"), "expansion.fields") dd113 <- attr(read.dta13("data/dta_113.dta"), "expansion.fields") dd112 <- attr(read.dta13("data/dta_112.dta"), "expansion.fields") dd111 <- attr(read.dta13("data/dta_111.dta"), "expansion.fields") dd110 <- attr(read.dta13("data/dta_110.dta"), "expansion.fields") dd108 <- attr(read.dta13("data/dta_108.dta"), "expansion.fields") dd107 <- attr(read.dta13("data/dta_107.dta"), "expansion.fields") dd106 <- attr(read.dta13("data/dta_106.dta"), "expansion.fields") dd105 <- attr(read.dta13("data/dta_105.dta"), "expansion.fields") # dd104 <- read.dta13("data/dta_104.dta") # dd103 <- read.dta13("data/dta_103.dta") # dd102 <- read.dta13("data/dta_102.dta") unlink("data", recursive = TRUE) test_that("expansinon.fields", { # check numerics expect_equal(ef, dd121) expect_equal(ef, dd120) expect_equal(ef, dd119) expect_equal(ef, dd118) expect_equal(ef, dd117) expect_equal(ef, dd115) expect_equal(ef, dd114) expect_equal(ef, dd113) expect_equal(ef, dd112) expect_equal(ef, dd111) expect_equal(ef, dd110) expect_equal(ef, dd108) expect_equal(ef, dd107) expect_equal(ef, dd106) expect_equal(ef, dd105) # expect_equal(ef, dd104) # expect_equal(ef, dd103) # expect_equal(ef, dd102) }) #### save and read varlabels #### if (readstata13:::dir.exists13("data")) { unlink("data", recursive = TRUE) } dir.create("data") dd <- mtcars varlabeldd <- LETTERS[seq_len(ncol(dd))] varlabel(dd) <- varlabeldd version_list <- c(102,103,104,105,106,107,108,110,111, 112,113,114,115,117,118,119,120,121) # write variable label attribute for(v in version_list) { save.dta13(dd, paste0("data/dta_", v, ".dta"), version = v) } # read variable label attribute varlabeldd_read <- lapply(version_list, function(v) { attr(read.dta13(paste0("data/dta_", v, ".dta")), "var.labels") }) names(varlabeldd_read) <- as.character(version_list) unlink("data", recursive = TRUE) test_that("save and read varlabels", { for(v in as.character(version_list)) { expect_equal(varlabeldd, varlabeldd_read[[v]]) } }) #### differentiating "NA" and NA_character works #### if (readstata13:::dir.exists13("data")) unlink("data", recursive = TRUE) dir.create("data") dd <- data.frame(x1 = c("NA", NA_character_)) exp <- data.frame(x1 = c("NA", "")) save.dta13(dd, "data/dta_121.dta", version = 121) save.dta13(dd, "data/dta_120.dta", version = 120) save.dta13(dd, "data/dta_119.dta", version = 119) save.dta13(dd, "data/dta_118.dta", version = 118) save.dta13(dd, "data/dta_117.dta", version = 117) save.dta13(dd, "data/dta_115.dta", version = 115) save.dta13(dd, "data/dta_114.dta", version = 114) save.dta13(dd, "data/dta_113.dta", version = 113) save.dta13(dd, "data/dta_112.dta", version = 112) save.dta13(dd, "data/dta_111.dta", version = 111) save.dta13(dd, "data/dta_110.dta", version = 110) save.dta13(dd, "data/dta_108.dta", version = 108) save.dta13(dd, "data/dta_107.dta", version = 107) save.dta13(dd, "data/dta_106.dta", version = 106) save.dta13(dd, "data/dta_105.dta", version = 105) save.dta13(dd, "data/dta_104.dta", version = 104) save.dta13(dd, "data/dta_103.dta", version = 103) save.dta13(dd, "data/dta_102.dta", version = 102) dd121 <- read.dta13("data/dta_121.dta") dd120 <- read.dta13("data/dta_120.dta") dd119 <- read.dta13("data/dta_119.dta") dd118 <- read.dta13("data/dta_118.dta") dd117 <- read.dta13("data/dta_117.dta") dd115 <- read.dta13("data/dta_115.dta") dd114 <- read.dta13("data/dta_114.dta") dd113 <- read.dta13("data/dta_113.dta") dd112 <- read.dta13("data/dta_112.dta") dd111 <- read.dta13("data/dta_111.dta") dd110 <- read.dta13("data/dta_110.dta") dd108 <- read.dta13("data/dta_108.dta") dd107 <- read.dta13("data/dta_107.dta") dd106 <- read.dta13("data/dta_106.dta") dd105 <- read.dta13("data/dta_105.dta") dd104 <- read.dta13("data/dta_104.dta") dd103 <- read.dta13("data/dta_103.dta") dd102 <- read.dta13("data/dta_102.dta") test_that("NA character works", { expect_true(datacompare(exp, dd121)) expect_true(datacompare(exp, dd120)) expect_true(datacompare(exp, dd119)) expect_true(datacompare(exp, dd118)) expect_true(datacompare(exp, dd117)) expect_true(datacompare(exp, dd115)) expect_true(datacompare(exp, dd114)) expect_true(datacompare(exp, dd113)) expect_true(datacompare(exp, dd112)) expect_true(datacompare(exp, dd111)) expect_true(datacompare(exp, dd110)) expect_true(datacompare(exp, dd108)) expect_true(datacompare(exp, dd107)) expect_true(datacompare(exp, dd106)) expect_true(datacompare(exp, dd105)) expect_true(datacompare(exp, dd104)) expect_true(datacompare(exp, dd103)) expect_true(datacompare(exp, dd102)) }) # the same with strls if (readstata13:::dir.exists13("data")) unlink("data", recursive = TRUE) dir.create("data") # strLs can be of length any length up to 2 billion characters. Starting with # 2046 a string is handled as a strL dd <- data.frame( dat = c(paste(replicate(2046, "a"), collapse = ""), paste(replicate(2046, "b"), collapse = ""), "NA", NA_character_), stringsAsFactors = FALSE) save.dta13(dd, "data/dta_121.dta", version = 121) save.dta13(dd, "data/dta_120.dta", version = 120) save.dta13(dd, "data/dta_119.dta", version = 119) save.dta13(dd, "data/dta_118.dta", version = 118) save.dta13(dd, "data/dta_117.dta", version = 117) dd121 <- read.dta13("data/dta_121.dta") dd120 <- read.dta13("data/dta_120.dta") dd119 <- read.dta13("data/dta_119.dta") dd118 <- read.dta13("data/dta_118.dta") dd117 <- read.dta13("data/dta_117.dta") test_that("NA character works", { expect_true(datacompare(dd, dd121)) expect_true(datacompare(dd, dd120)) expect_true(datacompare(dd, dd119)) expect_true(datacompare(dd, dd118)) expect_true(datacompare(dd, dd117)) }) readstata13/tests/testthat.R0000644000176200001440000000010214372711643015514 0ustar liggesuserslibrary(testthat) library(readstata13) test_check("readstata13") readstata13/MD50000644000176200001440000001027415002667742012713 0ustar liggesusersd191a70bb57d507f90fa999a3c6797df *DESCRIPTION e8c1458438ead3c34974bc0be3a03ed6 *LICENSE 0963934f8e8bf919d949e368a28d921f *NAMESPACE 05b4109a39bb1ae2c496014182e5994b *NEWS 4a10df0cc20643f8c32768c341105338 *NEWS.md e3f4d7ff793da3436f1a3a6cd798aa98 *R/RcppExports.R 4af4dbe0e04dba572bda1540448ca937 *R/convert.R babf6247e6872fe51f6ade47997be646 *R/dbcal.R 81dda0484779c044d0c5b531297ccb70 *R/read.R b33a8fdb5905b1e77f7cd5d8895266ae *R/readstata13.R 051a89b081c70b919ed158fb5584ef30 *R/save.R eff1901115994c76c7ffe848f44d4390 *R/tools.R 5108751c603f8713e2dc271155615b21 *README.md 756db882c3e616f458e953a9c4915f10 *build/vignette.rds 1c3df270ba4036845055a186c286c477 *inst/doc/readstata13_basic_manual.R cdaab43f22c7fc7b3da387a9df21f703 *inst/doc/readstata13_basic_manual.Rmd 7e42e00161352f84c2d3d41515eb126c *inst/doc/readstata13_basic_manual.html df1d0cf8d5ec6e6a2c6ace8114d7544c *inst/extdata/datetime.do f8f52bd111449bb5310fb0cbd728926e *inst/extdata/datetime.dta a885e4f610350825892c92d3ca858889 *inst/extdata/encode.do 1165031bfee6c9e6ce501baa24e3a7f1 *inst/extdata/encode.dta 23c478f4b7d45b7aabcc48a0f5795480 *inst/extdata/encodecp.dta b9463f13d2e57b2d0ee028368eefcd29 *inst/extdata/gen_fac.do 1530f9cdf1f80c39158ea8d249e19af0 *inst/extdata/gen_fac.dta d6127dcadbd1316ee9dafd18420f01b1 *inst/extdata/missings.do dcd880aca64cc264c0ba20ee9b8d1510 *inst/extdata/missings.dta d66c8a83373c17ab2098ca07b975a97e *inst/extdata/missings_lsf.dta 36d795506440d058f7506aa0a7b70989 *inst/extdata/missings_msf.dta 2f24378972ab2a3897f05625e820f764 *inst/extdata/myproject2.dtas 8204563fbdff2e7ee74951eb894c6154 *inst/extdata/nonint.do ed8842275b4ba33858fe0822ff3f178e *inst/extdata/nonint.dta 295396a1a55b4326d89d2c2a86e90441 *inst/extdata/sp500.stbcal 389e33d907d10ec8efe41250f99221ab *inst/extdata/statacar.do f899f302225e099f83de7ac42f0623f2 *inst/extdata/statacar.dta 1e29776eed16f780a9beee2d11ada4d4 *inst/extdata/underscore.do 18d63a094394dd93f3b4363fcd09f322 *inst/extdata/underscore.dta be3bdd7d0414f9b7b9770645b944320a *inst/include/read_data.h 06760831fbebb52dfd74f83fd01968fd *inst/include/read_dta.h c007a8b3bf0a6c4a5562edc4d12af3dc *inst/include/read_pre13_dta.h 8b4125bc9baadc0407d1b131b0f266a7 *inst/include/readstata.h bb287b064b0e4a61d5a7396e99c29630 *inst/include/statadefines.h fc806a4ead84a5b3c6bb4f00af91ebf3 *inst/include/swap_endian.h 3e936e81cffb62a119785e96d210b1e9 *man/as.caldays.Rd bb5c9612d862dd59aa8b9ed538e5789a *man/get.frames.Rd 0db337c2d06483d1cc9417c75903b4f5 *man/get.label.Rd 8dff90ecaf79055181b6666d45621b25 *man/get.label.name.Rd 1aeb1e5335f4e76bbe4b046a578a2b80 *man/get.label.tables.Rd 5a4700ab8b6e29b9ad1fd134a6c62977 *man/get.lang.Rd 3b2bb969adb3f8a26d5741cf467d470b *man/get.origin.codes.Rd f3c2ac88ad9ea19659f1d7c35f3d0ac9 *man/maxchar.Rd a6a14fa7e9120d4b735fc9973864d116 *man/read.dta13.Rd 9fed7d560541216f3e7eff7a9ea8f00a *man/read.dtas.Rd 71f1e3ccae8375b9365488ba436eb934 *man/readstata13.Rd 417f416418e173fba191b6bd7dc821f6 *man/save.dta13.Rd f403ecad1a2ea32a3ffd1af54e026cb4 *man/saveToExport.Rd 9dd790746cc83f755b65139c745e9c93 *man/set.label.Rd 67e025e2c70d6e96d54703a7b6654663 *man/set.lang.Rd 086d928578359d5c3b6fb0495451eb16 *man/stbcal.Rd 69dd3e9f18ec9f2187d2152c8b830d86 *man/varlabel.Rd 3a3c839566f06a1eb4cb3c960b7cab62 *src/Makevars 6cff4b1cc9d29ba49fdda13bf94571dc *src/RcppExports.cpp b02f2eb90919eccbf30ba53ff56309a6 *src/read.cpp 5a198eb8833d788a96e5ae83cc2c3d35 *src/read_data.cpp 6b9d779697d802f2d191fbda041ccc9b *src/read_dta.cpp 60564659b36a9176453b6efe64d97776 *src/read_pre13_dta.cpp 4db57902fd6ef5db3ef245cb8e5c5d61 *src/save_dta.cpp 88038551f36d81ec814da366bfd4664f *src/save_pre13_dta.cpp 4dd91c288ce11a342d68442481e65e8b *tests/testthat.R 0377e2e97a3e22a2560fe05f0d3eb075 *tests/testthat/data/dta_117.dta ecd3cc99bf13c7f04ec2325511b1ba4e *tests/testthat/data/dta_118.dta 9007adb1d21f92ebc667db86f7f34885 *tests/testthat/data/dta_119.dta ce6ef64a308cd08f164e1298e854b247 *tests/testthat/data/dta_120.dta 265b37f5c63531ed2ddbc70d26b1c35f *tests/testthat/data/dta_121.dta f86a0c13503ae404d4068c6a30e44346 *tests/testthat/test_read.R 9e4de6415ddba329c06e48445321a856 *tests/testthat/test_save.R cdaab43f22c7fc7b3da387a9df21f703 *vignettes/readstata13_basic_manual.Rmd 6555a9a809a513238b3f244e2989f1a1 *vignettes/stata_strl.dta readstata13/R/0000755000176200001440000000000015002660515012567 5ustar liggesusersreadstata13/R/save.R0000644000176200001440000003337615002626101013655 0ustar liggesusers# # Copyright (C) 2014-2025 Jan Marvin Garbuszus and Sebastian Jeworutzki # # This program is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by the # Free Software Foundation; either version 2 of the License, or (at your # option) any later version. # # This program is distributed in the hope that it will be useful, but WITHOUT # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for # more details. # # You should have received a copy of the GNU General Public License along # with this program. If not, see . #' Write Stata Binary Files #' #' \code{save.dta13} writes a Stata dta-file bytewise and saves the data #' into a dta-file. #' #' @param file \emph{character.} Path to the dta file you want to export. #' @param data \emph{data.frame.} A data.frame Object. #' @param data.label \emph{character.} Name of the dta-file. #' @param time.stamp \emph{logical.} If \code{TRUE}, add a time.stamp to the #' dta-file. #' @param convert.factors \emph{logical.} If \code{TRUE}, factors will be #' converted to Stata variables with labels. #' Stata expects strings to be encoded as Windows-1252, so all levels will be #' recoded. Character which can not be mapped in Windows-1252 will be saved as #' hexcode. #' @param convert.dates \emph{logical.} If \code{TRUE}, dates will be converted #' to Stata date time format. Code from \code{foreign::write.dta} #' @param convert.underscore \emph{logical.} If \code{TRUE}, all non numerics or #' non alphabet characters will be converted to underscores. #' @param tz \emph{character.} time zone specification to be used for #' POSIXct values and dates (if convert.dates is TRUE). ‘""’ is the current #' time zone, and ‘"GMT"’ is UTC (Universal Time, Coordinated). #' @param add.rownames \emph{logical.} If \code{TRUE}, a new variable rownames #' will be added to the dta-file. #' @param compress \emph{logical.} If \code{TRUE}, the resulting dta-file will #' use all of Statas numeric-vartypes. #' @param version \emph{numeric.} Stata format for the resulting dta-file either #' Stata version number (6 - 16) or the internal Stata dta-format (e.g. 117 for #' Stata 13). Support for large datasets: Use version="15mp" to #' save the dataset in the new Stata 15/16 MP file format. This feature is not #' thoroughly tested yet. #' @return The function writes a dta-file to disk. The following features of the #' dta file format are supported: #' \describe{ #' \item{datalabel:}{Dataset label} #' \item{time.stamp:}{Timestamp of file creation} #' \item{formats:}{Stata display formats. May be used with #' \code{\link[base]{sprintf}}} #' \item{type:}{Stata data type (see Stata Corp 2014)} #' \item{var.labels:}{Variable labels} #' \item{version:}{dta file format version} #' \item{strl:}{List of character vectors for the new strL string variable #' type. The first element is the identifier and the second element the #' string.} #' } #' @seealso \code{\link[foreign]{read.dta}} in package \code{foreign} and #' \code{memisc} for dta files from Stata versions < 13 and \code{read_dta} in #' package \code{haven} for Stata version >= 13. #' @references Stata Corp (2014): Description of .dta file format #' \url{https://www.stata.com/help.cgi?dta} #' @examples #' \dontrun{ #' library(readstata13) #' save.dta13(cars, file="cars.dta") #' } #' @author Jan Marvin Garbuszus \email{jan.garbuszus@@ruhr-uni-bochum.de} #' @author Sebastian Jeworutzki \email{sebastian.jeworutzki@@ruhr-uni-bochum.de} #' @useDynLib readstata13, .registration = TRUE #' @export save.dta13 <- function(data, file, data.label=NULL, time.stamp=TRUE, convert.factors=TRUE, convert.dates=TRUE, tz="GMT", add.rownames=FALSE, compress=FALSE, version=117, convert.underscore=FALSE){ if (!is.data.frame(data)) stop("The object \"data\" must have class data.frame") if (!dir.exists13(dirname(file))) stop("Path is invalid. Possibly a non-existing directory.") # Allow writing version as Stata version not Stata format if (version=="15mp" | version=="16mp") version <- 119 if (version==15L | version==16L) version <- 118 if (version==14L) version <- 118 if (version==13L) version <- 117 if (version==12L) version <- 115 if (version==11L | version==10L) version <- 114 if (version==9L | version==8L) version <- 113 if (version==7) version <- 110 if (version==6) version <- 108 if (version<102 | version == 109 | version == 116 | version>121) stop("Version mismatch abort execution. No data was saved.") sstr <- 2045 sstrl <- 32768 sdouble <- 65526 sfloat <- 65527 slong <- 65528 sint <- 65529 sbyte <- 65530 if (version < 117) { sstr <- 244 sstrl <- 244 sdouble <- 255 sfloat <- 254 slong <- 253 sint <- 252 sbyte <- 251 } if (version<111 | version==112) sstrl <- 80 if(!is.data.frame(data)) { stop("Object is not of class data.frame.") } is_utf8 <- l10n_info()[["UTF-8"]] # Is recoding necessary? if (version<=117) { # Reencoding is always needed doRecode <- TRUE toEncoding <- "CP1252" } else if (!is_utf8) { # If R runs in a non UTF-8 locale and Stata > 13 doRecode <- TRUE toEncoding <- "UTF-8" } else { # utf-8 and Stata > 13 doRecode <- FALSE } if (add.rownames) { if (doRecode) { rwn <- save.encoding(rownames(data), toEncoding) } else { rwn <-rownames(data) } data <- data.frame(rownames= rwn, data, stringsAsFactors = F) } rownames(data) <- NULL if (convert.underscore) { names(data) <- gsub("[^a-zA-Z0-9_]", "_", names(data)) names(data)[grepl("^[0-9]", names(data))] <- paste0( "_", names(data)[grepl("^[0-9]", names(data))]) } filepath <- path.expand(file) # For now we handle numeric and integers vartypen <- sapply(data, class) names(vartypen) <- names(data) # Convert logicals to integers for (v in names(vartypen[vartypen == "logical"])) data[[v]] <- as.integer(data[[v]]) vartypen <- vtyp <- sapply(data, class) # Identify POSIXt posix_datetime <- which(sapply(data, function(x) inherits(x, "POSIXt"))) vartypen[posix_datetime] <- vtyp[posix_datetime] <- "POSIXt" # Change origin to 1960-01-01 # times: seconds from 1970-01-01 + 10 years (new origin 1960-01-01) * 1000 = miliseconds # go back 1h for (v in names(vartypen[vartypen == "POSIXt"])) data[[v]] <- (as.double(data[[v]]) + 315622800 - 60*60)*1000 if (convert.factors){ if (version < 106) { hasfactors <- sapply(data, is.factor) if (any(hasfactors)) warning(paste("dta-format < 106 can not handle factors.", "Labels are not saved!")) } # If our data.frame contains factors, we create a label.table factors <- which(sapply(data, is.factor)) f.names <- attr(factors,"names") label.table <- vector("list", length(f.names)) names(label.table) <- f.names valLabel <- sapply(data, class) valLabel[valLabel != "factor"] <- "" i <- 0 for (v in factors) { i <- i + 1 if (doRecode) { f.levels <- save.encoding(levels(data[[v]]), toEncoding) } else { f.levels <- levels(data[[v]]) } f.labels <- as.integer(labels(levels(data[[v]]))) attr(f.labels, "names") <- f.levels f.labels <- f.labels[names(f.labels) != ".."] label.table[[ (f.names[i]) ]] <- f.labels valLabel[v] <- f.names[i] } attr(data, "label.table") <- rev(label.table) if (doRecode) { valLabel <- sapply(valLabel, save.encoding, toEncoding) } attr(data, "vallabels") <- valLabel } else { attr(data, "label.table") <- NULL attr(data, "vallabels") <- rep("",length(data)) } if (convert.dates) { dates <- which(sapply(data, function(x) inherits(x, "Date")) ) for (v in dates) data[[v]] <- as.vector( julian(data[[v]],as.Date("1960-1-1", tz = "GMT")) ) } # is.numeric is TRUE for integers ff <- sapply(data, is.numeric) ii <- sapply(data, is.integer) factors <- sapply(data, is.factor) empty <- sapply(data, function(x) all(is.na(x) & !is.character(x))) ddates <- vartypen == "Date" # default no compression: numeric as double; integer as long; date as date; # empty as byte if (!compress) { vartypen[ff] <- sdouble vartypen[ii] <- slong vartypen[factors] <- slong vartypen[ddates] <- -sdouble vartypen[empty] <- sbyte } else { varTmin <- sapply(data[(ff | ii) & !empty], function(x) min(x,na.rm=TRUE)) varTmax <- sapply(data[(ff | ii) & !empty], function(x) max(x,na.rm=TRUE)) # check if numerics can be stored as integers numToCompress <- sapply(data[ff], saveToExport) if (any(numToCompress)) { saveToConvert <- names(data[ff])[numToCompress] # replace numerics as integers data[saveToConvert] <- sapply(data[saveToConvert], as.integer) # recheck after update ff <- sapply(data, is.numeric) ii <- sapply(data, is.integer) } vartypen[ff] <- sdouble bmin <- -127; bmax <- 100 imin <- -32767; imax <- 32740 # check if integer is byte, int or long for (k in names(which(ii & !empty))) { vartypen[k][varTmin[k] < imin | varTmax[k] > imax] <- slong vartypen[k][varTmin[k] > imin & varTmax[k] < imax] <- sint vartypen[k][varTmin[k] > bmin & varTmax[k] < bmax] <- sbyte } factorlength <- sapply(data[factors & !empty], nlevels) for (k in names(which(factors & !empty))) { vartypen[factors & factorlength[k] > 0x1.000000p127] <- slong vartypen[factors & factorlength[k] < 0x1.000000p127] <- sint vartypen[factors & factorlength[k] < 101] <- sbyte } # keep dates as is vartypen[ddates] <- -sdouble # cast empty variables as byte vartypen[empty] <- sbyte } # recode character variables. >118 wants utf-8, so encoding may be required if(doRecode) { #TODO: use seq_len ? for(v in (1:ncol(data))[vartypen == "character"]) { data[, v] <- save.encoding(data[, v], toEncoding) } } # str and strL are stored by maximum length of chars in a variable str.length <- sapply(data[vartypen == "character"], FUN=maxchar) str.length[str.length > sstr] <- sstrl # vartypen for character for (v in names(vartypen[vartypen == "character"])) { # str.length[str.length > sstr] <- sstrl # no loop necessary! vartypen[[v]] <- str.length[[v]] } # save type bevor abs() formats <- vartypen vartypen <- abs(as.integer(vartypen)) attr(data, "types") <- vartypen # ToDo: Add propper check. # # value_label_names must be < 33 chars # if (sapply(valLabel,FUN=maxchar) >= 33) # message ("at least one variable name is to long.") # Resize varnames to 32. Stata requires this. It allows storing 32*4 bytes, # but can not work with longer variable names. Chars can be 1 - 4 bytes we # count the varnames in R. Get nchars and trim them. varnames <- names(data) lenvarnames <- sapply(varnames, nchar) maxlen <- 32 if (version <= 108) maxlen <- 8 if (version >= 118) maxlen <- 128 if (any (lenvarnames > maxlen)) { message ("Varname to long. Resizing. Max size is ", maxlen, ".") names(data) <- sapply(varnames, strtrim, width = maxlen) } # Stata format "%9,0g" means european format formats <- vartypen formats[vtyp == "Date"] <- "%td" formats[vtyp == "POSIXt"] <- "%tc" formats[formats == sdouble] <- "%9.0g" formats[formats == sfloat] <- "%9.0g" formats[formats == slong] <- "%9.0g" formats[formats == sint] <- "%9.0g" formats[formats == sbyte] <- "%9.0g" formats[vartypen >= 0 & vartypen <= sstr] <- paste0("%", formats[vartypen >= 0 & vartypen <= sstr], "s") formats[formats == sstrl] <- "%9s" attr(data, "formats") <- formats # Create a datalabel if (is.null(data.label)) { attr(data, "datalabel") <- "Written by R" } else { if (version == 102L) warning("Format 102 does not print a data label in Stata.") if (doRecode) { data.label <- save.encoding(data.label, toEncoding) } attr(data, "datalabel") <- data.label } # Create the 17 char long timestamp. It may contain 17 char long strings if (!time.stamp) { attr(data, "timestamp") <- "" } else { lct <- Sys.getlocale("LC_TIME"); Sys.setlocale("LC_TIME", "C") attr(data, "timestamp") <- format(Sys.time(), "%d %b %Y %H:%M") Sys.setlocale("LC_TIME",lct) } expfield <- attr(data, "expansion.fields") if (doRecode) { expfield <- lapply(expfield, function(x) iconv(x, to=toEncoding)) } attr(data, "expansion.fields") <- rev(expfield) attr(data, "version") <- as.character(version) if (version < 117) attr(data, "version") <- version # If length of varlabels differs from ncols drop varlabels. This can happen, # when the initial data.frame was read by read.dta13 and another variable was # attached. In this case the last variable label has a non existing variable # label which will crash our Rcpp code. Since varlabels do not respect the # ordering inside the data frame, we simply drop them. varlabels <- attr(data, "var.labels") if (doRecode) { attr(data, "var.labels") <- save.encoding(varlabels, toEncoding) } if (!is.null(varlabels) & (length(varlabels)!=ncol(data))) { attr(data, "var.labels") <- NULL warning("Number of variable labels does not match number of variables. Variable labels dropped.") } if (version >= 117) invisible( stata_save(filePath = filepath, dat = data) ) else invisible( stata_pre13_save(filePath = filepath, dat = data) ) } readstata13/R/RcppExports.R0000644000176200001440000000106415002660515015204 0ustar liggesusers# Generated by using Rcpp::compileAttributes() -> do not edit by hand # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 stata_read <- function(filePath, missing, selectrows, selectcols_chr, selectcols_int, strlexport, strlpath) { .Call(`_readstata13_stata_read`, filePath, missing, selectrows, selectcols_chr, selectcols_int, strlexport, strlpath) } stata_save <- function(filePath, dat) { .Call(`_readstata13_stata_save`, filePath, dat) } stata_pre13_save <- function(filePath, dat) { .Call(`_readstata13_stata_pre13_save`, filePath, dat) } readstata13/R/dbcal.R0000644000176200001440000001444115002626101013754 0ustar liggesusers# # Copyright (C) 2014-2025 Jan Marvin Garbuszus and Sebastian Jeworutzki # # This program is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by the # Free Software Foundation; either version 2 of the License, or (at your # option) any later version. # # This program is distributed in the hope that it will be useful, but WITHOUT # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for # more details. # # You should have received a copy of the GNU General Public License along # with this program. If not, see . #' Parse Stata business calendar files #' #' Create conversion table for business calendar dates. #' #' @param stbcalfile \emph{stbcal-file} Stata business calendar file created by #' Stata. #' @return Returns a data.frame with two cols: #' \describe{ #' \item{range:}{The date matching the businessdate. Date format.} #' \item{buisdays:}{The Stata business calendar day. Integer format.} #' } #' @details Stata 12 introduced business calendar format. Business dates are #' integer numbers in a certain range of days, weeks, months or years. In this #' range some days are omitted (e.g. weekends or holidays). If a business #' calendar was created, a stbcal file matching this calendar was created. This #' file is required to read the business calendar. This parser reads the stbcal- #' file and returns a data.frame with dates matching business calendar dates. #' #' A dta-file containing Stata business dates imported with read.stata13() shows #' in formats which stdcal file is required (e.g. "%tbsp500" requires #' sp500.stbcal). #' #' Stata allows adding a short description called purpose. This is added as an #' attribute of the resulting data.frame. #' @author Jan Marvin Garbuszus \email{jan.garbuszus@@ruhr-uni-bochum.de} #' @author Sebastian Jeworutzki \email{sebastian.jeworutzki@@ruhr-uni-bochum.de} #' @examples #' sp500 <- stbcal(system.file("extdata/sp500.stbcal", package="readstata13")) #' @importFrom stats complete.cases #' @export stbcal <- function(stbcalfile) { # Otherwise localised dates will be used. lct <- Sys.getlocale("LC_TIME"); Sys.setlocale("LC_TIME", "C") # Parse full file stbcal <- file(stbcalfile, "rb") x <- readLines(stbcal, file.info(stbcalfile)$size) close(stbcal) # Dateformat can be ymd, ydm, myd, mdy, dym or dmy if(any(grepl("dateformat ymd", x))) dateformat <- "%Y%b%d" if(any(grepl("dateformat ydm", x))) dateformat <- "%Y%d%b" if(any(grepl("dateformat myd", x))) dateformat <- "%b%Y%d" if(any(grepl("dateformat mdy", x))) dateformat <- "%b%d%Y" if(any(grepl("dateformat dym", x))) dateformat <- "%b%Y%d" if(any(grepl("dateformat dmy", x))) dateformat <- "%d%b%Y" # Range of stbcal. Range is required, contains start and end. rangepos <- grep("range", x) range <- x[rangepos] range <- strsplit(range, " ") rangestart <- range[[1]][2] rangestop <- range[[1]][3] range <- seq(from= as.Date(rangestart, dateformat), to= as.Date(rangestop, dateformat), "days") # Centerdate of stbcal. Date that matches 0. centerpos <- grep("centerdate", x) centerdate <- x[centerpos] centerdate <- gsub("centerdate ","",centerdate) centerdate <- as.Date(centerdate, dateformat) # Omit Dayofweek omitdayofweekpos <- grep ("omit dayofweek", x) omitdayofweek <- x[omitdayofweekpos] # Mo, Tu, We, Th, Fr, Sa, Su daysofweek <- weekdays(as.Date(range)) stbcal <- data.frame(range = range, daysofweek=daysofweek) # Weekdays every week if (any(grepl("Mo", omitdayofweek))) stbcal$daysofweek[stbcal$daysofweek=="Monday"] <- NA if (any(grepl("Tu", omitdayofweek))) stbcal$daysofweek[stbcal$daysofweek=="Tuesday"] <- NA if (any(grepl("We", omitdayofweek))) stbcal$daysofweek[stbcal$daysofweek=="Wednesday"] <- NA if (any(grepl("Th", omitdayofweek))) stbcal$daysofweek[stbcal$daysofweek=="Thursday"] <- NA if (any(grepl("Fr", omitdayofweek))) stbcal$daysofweek[stbcal$daysofweek=="Friday"] <- NA if (any(grepl("Sa", omitdayofweek))) stbcal$daysofweek[stbcal$daysofweek=="Saturday"] <- NA if (any(grepl("Su", omitdayofweek))) stbcal$daysofweek[stbcal$daysofweek=="Sunday"] <- NA # Special days to be omitted if (any(grepl("omit date", x))) { dates <- grep("omit date", x) omitdates <- x[dates] omitdates <- gsub("omit date ", "", omitdates) dates <- as.Date(omitdates, dateformat) stbcal$daysofweek[which(stbcal$range%in%dates)] <- NA # Keep only wanted days stbcal$daysofweek behalten stbcal <- stbcal[complete.cases(stbcal$daysofweek),] } # In case centerdate is not rangestart: stbcal$buisdays <- NA stbcal$buisdays[stbcal$range==centerdate] <- 0 stbcal$buisdays[stbcal$rangecenterdate] <- seq( from=1, to=length(stbcal$range[stbcal$range>centerdate])) # Add purpose if (any(grepl("purpose", x))) { purposepos <- grep("purpose", x) purpose <- x[purposepos] attr(stbcal, "purpose") <- purpose } # restore locale Sys.setlocale("LC_TIME", lct) return(stbcal) } #' Convert Stata business calendar dates in readable dates. #' #' Convert Stata business calendar dates in readable dates. #' #' @param buisdays numeric Vector of business dates #' @param cal data.frame Conversion table for business calendar dates #' @param format character String with date format as in \code{\link{as.Date}} #' @return Returns a vector of readable dates. #' @author Jan Marvin Garbuszus \email{jan.garbuszus@@ruhr-uni-bochum.de} #' @author Sebastian Jeworutzki \email{sebastian.jeworutzki@@ruhr-uni-bochum.de} #' @examples #' # read business calendar and data #' sp500 <- stbcal(system.file("extdata/sp500.stbcal", package="readstata13")) #' dat <- read.dta13(system.file("extdata/statacar.dta", package="readstata13")) #' #' # convert dates and check #' dat$ldatescal2 <- as.caldays(dat$ldate, sp500) #' all(dat$ldatescal2==dat$ldatescal) #' @export as.caldays <- function(buisdays, cal, format="%Y-%m-%d") { rownames(cal) <- cal$buisdays dates <- cal[as.character(buisdays), "range"] if(!is.null(format)) as.Date(dates, format = format) return(dates) } readstata13/R/convert.R0000644000176200001440000000474615002626101014376 0ustar liggesusers# # Copyright (C) 2014-2025 Jan Marvin Garbuszus and Sebastian Jeworutzki # Copyright (C) of 'convert_dt_c' and 'convert_dt_C' Thomas Lumley # # This program is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by the # Free Software Foundation; either version 2 of the License, or (at your # option) any later version. # # This program is distributed in the hope that it will be useful, but WITHOUT # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for # more details. # # You should have received a copy of the GNU General Public License along # with this program. If not, see . convert_dt_c <- function(x, tz) { as.POSIXct((x + 0.1) / 1000, # avoid rounding down origin = "1960-01-01", tz = tz) } convert_dt_C <- function(x, tz) { ls <- .leap.seconds + seq_along(.leap.seconds) + 315619200 z <- (x + 0.1) / 1000 # avoid rounding down z <- z - rowSums(outer(z, ls, ">=")) as.POSIXct(z, origin = "1960-01-01", tz = tz) } # Convert Stata format %tm integer to R date. # Uses the first day of month. # # @param x element to be converted # @author Jan Marvin Garbuszus \email{jan.garbuszus@@ruhr-uni-bochum.de} # @author Sebastian Jeworutzki \email{sebastian.jeworutzki@@ruhr-uni-bochum.de} convert_dt_m <- function(x) { z <- x / 12 # divide by 12 to create years mth <- x %% 12 + 1 yr <- 1960 + floor(z) z <- ifelse(is.na(z), NA, paste0(yr, "-", mth, "-1")) z <- as.Date(z, "%Y-%m-%d") z } # Convert Stata format %tq integer to R date. # Uses the first month and day of quarter. # # @param x element to be converted # @author Jan Marvin Garbuszus \email{jan.garbuszus@@ruhr-uni-bochum.de} # @author Sebastian Jeworutzki \email{sebastian.jeworutzki@@ruhr-uni-bochum.de} convert_dt_q <- function(x) { z <- x / 4 yr <- 1960 + floor(z) qrt <- x %% 4 + 1 qrt_month <- c(1, 4, 7, 10) z <- ifelse(is.na(z), NA, paste0(yr, "-", qrt_month[qrt], "-1")) z <- as.Date(z, "%Y-%m-%d") z } # Convert Stata format %ty integer to R date # Uses the first month and day of year. # # @param x element to be converted # @author Jan Marvin Garbuszus \email{jan.garbuszus@@ruhr-uni-bochum.de} # @author Sebastian Jeworutzki \email{sebastian.jeworutzki@@ruhr-uni-bochum.de} convert_dt_y <- function(x) { z <- ifelse(is.na(x), NA, paste0(x, "-1-1")) z <- as.Date(z, "%Y-%m-%d") z } readstata13/R/tools.R0000644000176200001440000004762715002634727014100 0ustar liggesusers# # Copyright (C) 2014-2025 Jan Marvin Garbuszus and Sebastian Jeworutzki # # This program is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by the # Free Software Foundation; either version 2 of the License, or (at your # option) any later version. # # This program is distributed in the hope that it will be useful, but WITHOUT # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for # more details. # # You should have received a copy of the GNU General Public License along # with this program. If not, see . # Wrapper Around iconv Calls for Code Readability # # @param x element to be converted # @param encoding encoding to be used. # @author Jan Marvin Garbuszus \email{jan.garbuszus@@ruhr-uni-bochum.de} # @author Sebastian Jeworutzki \email{sebastian.jeworutzki@@ruhr-uni-bochum.de} read.encoding <- function(x, fromEncoding, encoding) { iconv(x, from=fromEncoding, to=encoding , sub="byte") } save.encoding <- function(x, encoding) { sapply(x, function(s) ifelse(Encoding(s) == "unknown", iconv(s, to=encoding, sub="byte"), iconv(s, from=Encoding(s), to=encoding, sub="byte") ) ) } # Function to check if directory exists # @param x file path dir.exists13 <-function(x) { path <- dirname(x) return(file.exists(path)) } # Construct File Path # # @param path path to dta file # @author Jan Marvin Garbuszus \email{jan.garbuszus@@ruhr-uni-bochum.de} # @author Sebastian Jeworutzki \email{sebastian.jeworutzki@@ruhr-uni-bochum.de} get.filepath <- function(path="") { if (substring(path, 1, 1) == "~") { filepath <- path.expand(path) } else { filepath <- path } if (!file.exists(filepath)) { return("File does not exist.") } return(filepath) } #' Show Default Label Language #' #' Displays informations about the defined label languages. #' #' @param dat \emph{data.frame.} Data.frame created by \code{read.dta13}. #' @param print \emph{logical.} If \code{TRUE}, print available languages and #' default language. #' @return Returns a list with two components: #' \describe{ #' \item{languages:}{Vector of label languages used in the dataset} #' \item{default:}{Name of the actual default label language, otherwise NA} #' } #' @details Stata allows to define multiple label sets in different languages. #' This functions reports the available languages and the selected default #' language. #' @author Jan Marvin Garbuszus \email{jan.garbuszus@@ruhr-uni-bochum.de} #' @author Sebastian Jeworutzki \email{sebastian.jeworutzki@@ruhr-uni-bochum.de} #' @export get.lang <- function(dat, print=T) { ex <- attr(dat, "expansion.fields") lang <- list() if (length(grep("_lang_list", ex)) > 0) { lang$languages <- strsplit(ex[[grep("_lang_list", ex)]][3], " ")[[1]] } else { lang$languages <- NA } lang$default <- ifelse(length(grep("_lang_c", ex)) > 0, ex[[grep("_lang_c", ex)]][3], NA) if (print) { cat("Available languages:\n ") cat(paste0(lang$languages, "\n")) cat("\nDefault language:\n") cat(paste0(" ",lang$default, "\n")) return(invisible(lang)) } return(lang) } #' Get Names of Stata Label Set #' #' Retrieves the Stata label set in the dataset for all or an vector of variable #' names. #' #' @param dat \emph{data.frame.} Data.frame created by \code{read.dta13}. #' @param var.name \emph{character vector.} Variable names. If \code{NULL}, get #' names of all label sets. #' @param lang \emph{character.} Label language. Default language defined by #' \code{\link{get.lang}} is used if NA #' @return Returns an named vector of variable labels #' @details Stata stores factor labels in variable independent labels sets. This #' function retrieves the name of the label set for a variable. #' @author Jan Marvin Garbuszus \email{jan.garbuszus@@ruhr-uni-bochum.de} #' @author Sebastian Jeworutzki \email{sebastian.jeworutzki@@ruhr-uni-bochum.de} #' @export get.label.name <- function(dat, var.name=NULL, lang=NA) { vnames <- names(dat) if (is.na(lang) | lang == get.lang(dat, F)$default) { labelsets <- attr(dat, "val.labels") names(labelsets) <- vnames } else if (is.character(lang)) { ex <- attr(dat, "expansion.fields") has_no_label_lang <- identical( integer(0), unlist(lapply(ex, grep, pattern ="_lang_l_")) ) if (has_no_label_lang) { return("") } varname <- sapply(ex[grep(paste0("_lang_l_", lang), ex)], function(x) x[1]) labelsets.tmp <- sapply(ex[grep(paste0("_lang_l_", lang), ex)], function(x) x[3]) names(labelsets.tmp) <- varname labelsets <- rep("", length(vnames)) names(labelsets) <- vnames labelsets[varname] <- labelsets.tmp[varname] } if (is.null(var.name)) { return(labelsets) } else { return(labelsets[var.name]) } } #' Get Origin Code Numbers for Factors #' #' Recreates the code numbers of a factor as stored in the Stata dataset. #' #' @param x \emph{factor.} Factor to obtain code for #' @param label.table \emph{table.} Table with factor levels obtained by #' \code{\link{get.label}}. #' @return Returns an integer with original codes #' @details While converting numeric variables into factors, the original code #' numbers are lost. This function reconstructs the codes from the attribute #' \code{label.table}. #' @author Jan Marvin Garbuszus \email{jan.garbuszus@@ruhr-uni-bochum.de} #' @author Sebastian Jeworutzki \email{sebastian.jeworutzki@@ruhr-uni-bochum.de} #' @examples #' dat <- read.dta13(system.file("extdata/statacar.dta", package="readstata13")) #' labname <- get.label.name(dat,"type") #' labtab <- get.label(dat, labname) #' #' # comparsion #' get.origin.codes(dat$type, labtab) #' as.integer(dat$type) #' @export get.origin.codes <- function(x, label.table) { if (is.factor(x)) { fac <- as.character(x) return(as.integer(label.table[fac])) } else { message("x is no factor.") } } #' Get Stata Label Table for a Label Set #' #' Retrieve the value labels for a specific Stata label set. #' #' @param dat \emph{data.frame.} Data.frame created by \code{read.dta13}. #' @param label.name \emph{character.} Name of the Stata label set #' @return Returns a named vector of code numbers #' @details This function returns the table of factor levels which represent #' a Stata label set. The name of a label set for a variable can be obtained #' by \code{\link{get.label.name}}. #' @examples #' dat <- read.dta13(system.file("extdata/statacar.dta", package="readstata13")) #' labname <- get.label.name(dat,"type") #' get.label(dat, labname) #' @author Jan Marvin Garbuszus \email{jan.garbuszus@@ruhr-uni-bochum.de} #' @author Sebastian Jeworutzki \email{sebastian.jeworutzki@@ruhr-uni-bochum.de} #' @export get.label <- function(dat, label.name) { return(attr(dat, "label.table")[label.name][[1]]) } #' Get all Stata Label Sets for a Data.frame #' #' Retrieve the value labels for all variables. #' #' @param dat \emph{data.frame.} Data.frame created by \code{read.dta13}. #' @return Returns a named list of label tables #' @details This function returns the factor levels which represent #' a Stata label set for all variables. #' @examples #' dat <- read.dta13(system.file("extdata/statacar.dta", package="readstata13")) #' get.label.tables(dat) #' @author Jan Marvin Garbuszus \email{jan.garbuszus@@ruhr-uni-bochum.de} #' @author Sebastian Jeworutzki \email{sebastian.jeworutzki@@ruhr-uni-bochum.de} #' @importFrom stats setNames #' @export get.label.tables <- function(dat) { varnames <- setNames(names(dat), names(dat)) lapply(varnames, function(varname) get.label(dat, get.label.name(dat, varname))) } #' Assign Stata Labels to a Variable #' #' Assign value labels from a Stata label set to a variable. If duplicated #' labels are found, unique labels will be generated according the following #' scheme: "label_(integer code)". Levels without labels will become . #' #' @param dat \emph{data.frame.} Data.frame created by \code{read.dta13}. #' @param var.name \emph{character.} Name of the variable in the data.frame #' @param lang \emph{character.} Label language. Default language defined by #' \code{\link{get.lang}} is used if NA #' @return Returns a labeled factor #' @examples #' dat <- read.dta13(system.file("extdata/statacar.dta", package="readstata13"), #' convert.factors=FALSE) #' #' # compare vectors #' set.label(dat, "type") #' dat$type #' #' # German label #' set.label(dat, "type", "de") #' @export set.label <- function(dat, var.name, lang=NA) { if (is.factor(dat[,var.name])) { tmp <- get.origin.codes(dat[,var.name], get.label(dat, get.label.name(dat, var.name))) } else { tmp <- dat[,var.name] } labtable <- get.label(dat, get.label.name(dat, var.name, lang)) #check for duplicated labels labcount <- table(names(labtable)) if (any(labcount > 1)) { warning(paste0("\n ",var.name, ":\n Duplicated factor levels detected -", "generating unique labels.\n")) labdups <- names(labtable) %in% names(labcount[labcount > 1]) # generate unique labels from assigned label and code number names(labtable)[labdups] <- paste0(names(labtable)[labdups], "_(", labtable[labdups], ")") } return(factor(tmp, levels=labtable, labels=names(labtable)) ) } #' Get and assign Stata Variable Labels #' #' Retrieve or set variable labels for a dataset. #' #' @name varlabel #' @rdname varlabel #' @param dat \emph{data.frame.} Data.frame created by \code{read.dta13}. #' @param var.name \emph{character vector.} Variable names. If NULL, get label #' for all variables. #' @param lang \emph{character.} Label language. Default language defined by #' \code{\link{get.lang}} is used if NA #' @param value \emph{character vector.} Character vector of size ncol(data) with variable names. #' @return Returns an named vector of variable labels #' @author Jan Marvin Garbuszus \email{jan.garbuszus@@ruhr-uni-bochum.de} #' @author Sebastian Jeworutzki \email{sebastian.jeworutzki@@ruhr-uni-bochum.de} #' @aliases varlabel #' @aliases 'varlabel<-' #' @examples #' dat <- read.dta13(system.file("extdata/statacar.dta", package="readstata13"), #' convert.factors=FALSE) #' #' # display variable labels #' varlabel(dat) #' #' # display german variable labels #' varlabel(dat, lang="de") #' #' # display german variable label for brand #' varlabel(dat, var.name = "brand", lang="de") #' #' # define new variable labels #' varlabel(dat) <- letters[1:ncol(dat)] #' #' # display new variable labels #' varlabel(dat) NULL #' @rdname varlabel #' @export varlabel <- function(dat, var.name=NULL, lang=NA) { vnames <- names(dat) if (is.na(lang) | lang == get.lang(dat, F)$default) { varlabel <- attr(dat, "var.labels") names(varlabel) <- vnames } else if (is.character(lang)) { ex <- attr(dat, "expansion.fields") varname <- sapply(ex[grep(paste0("_lang_v_", lang), ex)], function(x) x[1]) varlabel <- sapply(ex[grep(paste0("_lang_v_", lang), ex)], function(x) x[3]) names(varlabel) <- varname } if (is.null(var.name)) { # order by data.frame columns and return return(varlabel[vnames]) } else { return(varlabel[var.name]) } } #' @rdname varlabel #' @export 'varlabel<-' <- function(dat, value) { nlabs <- ncol(dat) if (length(value)==nlabs) { attr(dat, "var.labels") <- value } else { warning(paste("Vector of new labels must have", nlabs, "entries.")) } dat } #' Assign Stata Language Labels #' #' Changes default label language for a dataset. #' Variables with generated labels (option generate.labels=TRUE) are kept unchanged. #' #' @param dat \emph{data.frame.} Data.frame created by \code{read.dta13}. #' @param lang \emph{character.} Label language. Default language defined by #' \code{\link{get.lang}} is used if NA #' @param generate.factors \emph{logical.} If \code{TRUE}, missing factor levels #' are generated. #' @return Returns a data.frame with value labels in language "lang". #' @examples #' dat <- read.dta13(system.file("extdata/statacar.dta", package="readstata13")) #' get.lang(dat) #' varlabel(dat) #' #' # set German label #' datDE <- set.lang(dat, "de") #' get.lang(datDE) #' varlabel(datDE) #' @author Jan Marvin Garbuszus \email{jan.garbuszus@@ruhr-uni-bochum.de} #' @author Sebastian Jeworutzki \email{sebastian.jeworutzki@@ruhr-uni-bochum.de} #' @importFrom stats na.omit #' @importFrom utils txtProgressBar setTxtProgressBar #' @export set.lang <- function(dat, lang=NA, generate.factors=FALSE) { if (is.na(lang) | lang == get.lang(dat, F)$default) { return(dat) } else if (is.character(lang)) { vnames <- names(dat) types <- attr(dat, "types") label <- attr(dat, "label.table") val.labels <- get.label.name(dat, NULL, lang) oldval.labels <- get.label.name(dat) oldval.labels <- oldval.labels[!is.na(oldval.labels)] oldval.labtab <- lapply(oldval.labels, function(x) get.label(dat, x)) oldlang <- get.lang(dat, F)$default cat("Replacing value labels. This might take some time...\n") pb <- txtProgressBar(min=1,max=length(val.labels)+1) for (i in which(val.labels != "")) { labname <- val.labels[i] vartype <- types[i] labtable <- label[[labname]] varname <- names(val.labels)[i] # get old codes if (is.factor(dat[, varname])) { oldlabname <- oldval.labels[names(oldval.labels) == varname] oldlabtab <- oldval.labtab[[names(oldlabname)]] codes <- get.origin.codes(dat[,varname], oldlabtab) varunique <- na.omit(unique(codes)) } else { varunique <- na.omit(unique(dat[,varname])) } if (labname %in% names(label) & is.factor(dat[,varname])) { # assign label if label set is complete if (all(varunique %in% labtable)) { dat[,varname] <- factor(codes, levels=labtable, labels=names(labtable)) } # else generate labels from codes } else if (generate.factors) { names(varunique) <- as.character(varunique) gen.lab <- sort(c(varunique[!varunique %in% labtable], labtable)) dat[,varname] <- factor(codes, levels=gen.lab, labels=names(gen.lab)) } else { warning(paste(vnames[i], "Missing factor labels - no labels assigned. Set option generate.factors=T to generate labels.")) } setTxtProgressBar(pb, i) } close(pb) # Save old default labels to expansion.fields. This is necessary to save # original labels for further use. vnames <- names(oldval.labels) names(oldval.labels) <- NULL tmp <- list() for (i in seq_along(val.labels)) { tmp[[i]] <- c(vnames[i],paste0("_lang_l_",oldlang), oldval.labels[i]) } attr(dat, "expansion.fields") <- c(attr(dat, "expansion.fields"),tmp) # variable label old.varlabel <- attr(dat, "var.labels") tmp <- list() for (i in seq_along(old.varlabel)) { tmp[[i]] <- c(vnames[i],paste0("_lang_v_", oldlang), old.varlabel[i]) } attr(dat, "expansion.fields") <- c(attr(dat, "expansion.fields"),tmp) ex <- attr(dat, "expansion.fields") varname <- sapply(ex[grep(paste0("_lang_v_", lang), ex)], function(x) x[1]) varlabel <- sapply(ex[grep(paste0("_lang_v_", lang), ex)], function(x) x[3]) names(varlabel) <- varname varlabel.out <- as.character(varlabel[vnames]) varlabel.out[is.na(varlabel.out)] <- "" attr(dat, "var.labels") <- varlabel.out # set new default lang and store string as default attributes names(val.labels) <- NULL attr(dat, "val.labels") <- val.labels attr(dat, "expansion.fields")[[ grep("_lang_c", attr(dat, "expansion.fields")) ]][3] <- lang return(dat) } } #' Check if numeric vector can be expressed as integer vector #' #' Compression can reduce numeric vectors as integers if the vector does only #' contain integer type data. #' #' @param x vector of data frame saveToExport <- function(x) { ifelse(any(is.infinite(x)), FALSE, ifelse(any(!is.na(x) & (x > .Machine$integer.max | x < -.Machine$integer.max)), FALSE, isTRUE(all.equal(x, as.integer(x))))) } #' Check max char length of data.frame vectors #' #' Stata requires us to provide the maximum size of a charactervector as every #' row is stored in a bit region of this size. #' #' Ex: If the max chars size is four, _ is no character in this vector: #' 1. row: four #' 3. row: one_ #' 4. row: ____ #' #' If a character vector contains only missings or is empty, we will assign it a #' value of one, since Stata otherwise cannot handle what we write. #' #' @param x vector of data frame maxchar <- function(x) { z <- max(nchar(x, type="byte"), na.rm = TRUE) # Stata does not allow storing a string of size 0 if (is.infinite(z) | (z == 0)) z <- 1 z } #' Read frames from Stata dtas files #' #' Stata 18 introduced framesets (file extension `.dtas`) that contain zipped `dta` #' files. This helper functions imports those files and returns a list of data.frames. #' #' @param path path to .dtas file #' @param select.frames character vector #' @param read.dta13.options list of parameters used in \code{\link[readstata13]{read.dta13}}. The list must have the following structure: \code{list(framename = list(param = value))} #' @return Returns a named list of data.frames. #' @importFrom utils unzip #' @export #' @examples #' #' path <- system.file("extdata", "myproject2.dtas", package="readstata13") #' #' # read all frames in myproject2.dtas #' read.dtas(path) #' #' # read selected frames #' read.dtas(path, select.frames = c("persons", "counties")) #' #' # read only frame counties #' read.dtas(path, select.frames = c("counties")) #' #' # read frames with different arguments #' read.dtas(path, #' read.dta13.options = list(counties = list(select.cols = "median_income"), #' persons = list(select.cols = "income"))) #' read.dtas <- function(path, select.frames = NULL, read.dta13.options = NULL) { tmp <- tempdir() fls <- utils::unzip(path, exdir = tmp) # data name, dta file name, dta version frames <- strsplit(readLines(fls[grep(".frameinfo", fls)])[-c(1:2)], " ") frames <- as.data.frame(do.call("rbind", frames)) # select frames if(!is.null(select.frames)) { frames <- frames[frames$V1 %in% select.frames, ] } # read dtas opts <- vector(mode = "list", length = length(frames$V1)) names(opts) <- frames$V1 for(f in frames$V1) { if(is.list(read.dta13.options)) { opts[[f]] <- read.dta13.options[[f]] } opts[[f]][["file"]] <- file.path(tmp, paste0(frames$V2[frames$V1 == f], ".dta")) } dtas <- lapply(opts, function(f) do.call(read.dta13, f)) names(dtas) <- names(opts) return(dtas) } #' List frames in Stata dtas files #' #' Stata 18 introduced framesets (file extension `.dtas`) that contain zipped `dta` #' files. This helper functions imports those files and returns a list of data.frames. #' #' @param path path to .dtas file #' @return Returns a data.frame with frame names, internal filenames and dta file format version. #' @export #' @examples #' #' path <- system.file("extdata", "myproject2.dtas", package="readstata13") #' #' # print all frames in myproject2.dtas #' get.frames(path) #' get.frames <- function(path) { tmp <- tempdir() fls <- unzip(path, exdir = tmp, files = ".frameinfo") # data name, dta file name, dta version frames <- strsplit(readLines(fls[grep(".frameinfo", fls)])[-c(1:2)], " ") frames <- as.data.frame(do.call("rbind", frames)) names(frames) <- c("name", "filename", "version") return(frames) } readstata13/R/read.R0000644000176200001440000004514315002626101013625 0ustar liggesusers# # Copyright (C) 2014-2025 Jan Marvin Garbuszus and Sebastian Jeworutzki # Copyright (C) of 'convert.dates' and 'missing.types' Thomas Lumley # # This program is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by the # Free Software Foundation; either version 2 of the License, or (at your # option) any later version. # # This program is distributed in the hope that it will be useful, but WITHOUT # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for # more details. # # You should have received a copy of the GNU General Public License along # with this program. If not, see . #' Read Stata Binary Files #' #' \code{read.dta13} reads a Stata dta-file and imports the data into a #' data.frame. #' #' @param file \emph{character.} Path to the dta file you want to import. #' @param convert.factors \emph{logical.} If \code{TRUE}, factors from Stata #' value labels are created. #' @param generate.factors \emph{logical.} If \code{TRUE} and convert.factors is #' TRUE, missing factor labels are created from integers. If duplicated labels #' are found, unique labels will be generated according the following scheme: #' "label_(integer code)". #' @param encoding \emph{character.} Strings can be converted from Windows-1252 #' or UTF-8 to system encoding. Options are "latin1" or "UTF-8" to specify #' target encoding explicitly. Since Stata 14 files are UTF-8 encoded and #' may contain strings which can't be displayed in the current locale. #' Set encoding=NULL to stop reencoding. #' @param fromEncoding \emph{character.} We expect strings to be encoded as #' "CP1252" for Stata Versions 13 and older. For dta files saved with Stata 14 #' or newer "UTF-8" is used. In some situation the used encoding can differ for #' Stata 14 files and must be manually set. #' @param convert.underscore \emph{logical.} If \code{TRUE}, "_" in variable #' names will be changed to "." #' @param missing.type \emph{logical.} Stata knows 27 different missing types: #' ., .a, .b, ..., .z. If \code{TRUE}, attribute \code{missing} will be #' created. #' @param replace.strl \emph{logical.} If \code{TRUE}, replace the reference to #' a strL string in the data.frame with the actual value. The strl attribute #' will be removed from the data.frame (see details). #' @param convert.dates \emph{logical.} If \code{TRUE}, Stata dates are #' converted. #' @param add.rownames \emph{logical.} If \code{TRUE}, the first column will be #' used as rownames. Variable will be dropped afterwards. #' @param nonint.factors \emph{logical.} If \code{TRUE}, factors labels #' will be assigned to variables of type float and double. #' @param select.rows \emph{integer.} Vector of one or two numbers. If single #' value rows from 1:val are selected. If two values of a range are selected #' the rows in range will be selected. #' @param select.cols \emph{character.} or \emph{numeric.} Vector of variables #' to select. Either variable names or position. #' @param strlexport \emph{logical.} Should strl content be exported as binary #' files? #' @param strlpath \emph{character.} Path for strl export. #' @param tz \emph{character.} time zone specification to be used for #' POSIXct values. ‘""’ is the current time zone, and ‘"GMT"’ is UTC #' (Universal Time, Coordinated). #' #' @details If the filename is a url, the file will be downloaded as a temporary #' file and read afterwards. #' #' Stata files are encoded in ansinew. Depending on your system's default #' encoding certain characters may appear wrong. Using a correct encoding may #' fix these. #' #' Variable names stored in the dta-file will be used in the resulting #' data.frame. Stata types char, byte, and int will become integer; float and #' double will become numerics. R only knows a single missing type, while Stata #' knows 27, so all Stata missings will become NA in R. If you need to keep #' track of Statas original missing types, you may use #' \code{missing.type=TRUE}. #' #' Stata dates are converted to R's Date class the same way foreign handles #' dates. #' #' Stata 13 introduced a new character type called strL. strLs are able to store #' strings up to 2 billion characters. While R is able to store #' strings of this size in a character vector, the printed representation of #' such vectors looks rather cluttered, so it's possible to save only a #' reference in the data.frame with option \code{replace.strl=FALSE}. #' #' In R, you may use rownames to store characters (see for instance #' \code{data(swiss)}). In Stata, this is not possible and rownames have to be #' stored as a variable. If you want to use rownames, set add.rownames to TRUE. #' Then the first variable of the dta-file will hold the rownames of the #' resulting data.frame. #' #' Reading dta-files of older and newer versions than 13 was introduced #' with version 0.8. #' #' Stata 18 introduced alias variables and frame files. Alias variables are #' currently ignored when reading the file and a warning is printed. Stata #' frame files (file extension `.dtas`) contain zipped `dta` files which can #' be imported with \code{\link{read.dtas}}. #' #' @return The function returns a data.frame with attributes. The attributes #' include #' \describe{ #' \item{datalabel:}{Dataset label} #' \item{time.stamp:}{Timestamp of file creation} #' \item{formats:}{Stata display formats. May be used with #' \code{\link{sprintf}}} #' \item{types:}{Stata data type (see Stata Corp 2014)} #' \item{val.labels:}{For each variable the name of the associated value #' labels in "label"} #' \item{var.labels:}{Variable labels} #' \item{version:}{dta file format version} #' \item{label.table:}{List of value labels.} #' \item{strl:}{Character vector with long strings for the new strl string #' variable type. The name of every element is the identifier.} #' \item{expansion.fields:}{list providing variable name, characteristic name #' and the contents of Stata characteristic field.} #' \item{missing:}{List of numeric vectors with Stata missing type for each #' variable.} #' \item{byteorder:}{Byteorder of the dta-file. LSF or MSF.} #' \item{orig.dim:}{Dimension recorded inside the dta-file.} #' } #' @note read.dta13 uses GPL 2 licensed code by Thomas Lumley and R-core members #' from foreign::read.dta(). #' @seealso \code{\link[foreign]{read.dta}} in package \code{foreign} and #' \code{memisc} for dta files from Stata #' versions < 13 and \code{read_dta} in package \code{haven} for Stata version #' >= 13. #' @references Stata Corp (2014): Description of .dta file format #' \url{https://www.stata.com/help.cgi?dta} #' @examples #' \dontrun{ #' library(readstata13) #' r13 <- read.dta13("https://www.stata-press.com/data/r13/auto.dta") #' } #' @author Jan Marvin Garbuszus \email{jan.garbuszus@@ruhr-uni-bochum.de} #' @author Sebastian Jeworutzki \email{sebastian.jeworutzki@@ruhr-uni-bochum.de} #' @useDynLib readstata13, .registration = TRUE #' @importFrom utils download.file #' @importFrom stats na.omit #' @export read.dta13 <- function(file, convert.factors = TRUE, generate.factors=FALSE, encoding = "UTF-8", fromEncoding=NULL, convert.underscore = FALSE, missing.type = FALSE, convert.dates = TRUE, replace.strl = TRUE, add.rownames = FALSE, nonint.factors = FALSE, select.rows = NULL, select.cols = NULL, strlexport = FALSE, strlpath = ".", tz = "GMT") { # List to collect all warnings from factor conversion collected_warnings <- list(misslab = NULL, floatfact = NULL) # Check if path is a url if (length(grep("^(http|ftp|https)://", file))) { tmp <- tempfile() download.file(file, tmp, quiet = TRUE, mode = "wb") filepath <- tmp on.exit(unlink(filepath)) } else { # construct filepath and read file filepath <- get.filepath(file) } if (!file.exists(filepath)) stop("File not found.") # some select.row checks if (!is.null(select.rows)) { # check that it is a numeric if (!is.numeric(select.rows)){ return(message("select.rows must be of type numeric")) } else { # guard against negative values if (any(select.rows < 0) ) select.rows <- abs(select.rows) # check that length is not > 2 if (length(select.rows) > 2) return(message("select.rows must be of length 1 or 2.")) # if length 1 start at row 1 if (length(select.rows) == 1) select.rows <- c(1, select.rows) } # reorder if 2 is bigger than 1 if (select.rows[2] < select.rows[1]) select.rows <- c(select.rows[2], select.rows[1]) # make sure to start at index position 1 if select.rows[2] > 0 if (select.rows[2] > 0 & select.rows[1] == 0) select.rows[1] <- 1 } else { # set a value select.rows <- c(0,0) } select.cols_chr <- as.character(NA) select.cols_int <- as.integer(NA) # treat names and index differently if (!is.null(select.cols)) { if (is.character(select.cols)) select.cols_chr <- select.cols # do we need factor too? if (is.numeric(select.cols) | is.integer(select.cols)) select.cols_int <- select.cols } data <- stata_read(filepath, missing.type, select.rows, select.cols_chr, select.cols_int, strlexport, strlpath) version <- attr(data, "version") sstr <- 2045 sstrl <- 32768 salias <- 65525 sdouble <- 65526 sfloat <- 65527 slong <- 65528 sint <- 65529 sbyte <- 65530 if (version < 117) { sstr <- 244 sstrl <- 255 sdouble <- 255 sfloat <- 254 slong <- 253 sint <- 252 sbyte <- 251 } if (convert.underscore) names(data) <- gsub("_", ".", names(data)) types <- attr(data, "types") val.labels <- attr(data, "val.labels") label <- attr(data, "label.table") if (missing.type) { stata.na <- data.frame(type = sdouble:sbyte, min = c(101, 32741, 2147483621, 2 ^ 127, 2 ^ 1023), inc = c(1, 1, 1, 2 ^ 115, 2 ^ 1011) ) if (version >= 113L & version < 117L) { missings <- vector("list", length(data)) names(missings) <- names(data) for (v in which(types > 250L)) { this.type <- types[v] - 250L nas <- is.na(data[[v]]) | data[[v]] >= stata.na$min[this.type] natype <- (data[[v]][nas] - stata.na$min[this.type])/ stata.na$inc[this.type] natype[is.na(natype)] <- 0L missings[[v]] <- rep(NA, NROW(data)) missings[[v]][nas] <- natype data[[v]][nas] <- NA } attr(data, "missing") <- missings } else { if (version >= 117L) { missings <- vector("list", length(data)) names(missings) <- names(data) for (v in which(types > 65525L)) { this.type <- 65531L - types[v] nas <- is.na(data[[v]]) | data[[v]] >= stata.na$min[this.type] natype <- (data[[v]][nas] - stata.na$min[this.type]) / stata.na$inc[this.type] natype[is.na(natype)] <- 0L missings[[v]] <- rep(NA, NROW(data)) missings[[v]][nas] <- natype data[[v]][nas] <- NA } attr(data, "missing") <- missings } else warning("'missing.type' only applicable to version >= 8 files") } } var.labels <- attr(data, "var.labels") datalabel <- attr(data, "data.label") ## Encoding if(!is.null(encoding)) { # set from encoding by dta version if(is.null(fromEncoding)) { fromEncoding <- "CP1252" if(attr(data, "version") >= 118L) fromEncoding <- "UTF-8" } attr(data, "data.label") <- read.encoding(datalabel, fromEncoding, encoding) # varnames names(data) <- read.encoding(names(data), fromEncoding, encoding) # var.labels attr(data, "var.labels") <- read.encoding(var.labels, fromEncoding, encoding) # val.labels names(val.labels) <- read.encoding(val.labels, fromEncoding, encoding) attr(data, "val.labels") <- val.labels # label names(label) <- read.encoding(names(label), fromEncoding, encoding) if (length(label) > 0) { for (i in 1:length(label)) { names(label[[i]]) <- read.encoding(names(label[[i]]), fromEncoding, encoding) } attr(data, "label.table") <- label } # recode character variables for (v in (1:ncol(data))[types <= sstr]) { data[, v] <- iconv(data[, v], from=fromEncoding, to=encoding, sub="byte") } # expansion.field efi <- attr(data, "expansion.fields") if (length(efi) > 0) { efiChar <- unlist(lapply(efi, is.character)) for (i in (1:length(efi))[efiChar]) { efi[[i]] <- read.encoding(efi[[i]], fromEncoding, encoding) } attr(data, "expansion.fields") <- efi } if (version >= 117L) { #strl strl <- attr(data, "strl") if (length(strl) > 0) { for (i in 1:length(strl)) { strl[[i]] <- read.encoding(strl[[i]], fromEncoding, encoding) } attr(data, "strl") <- strl } } } var.labels <- attr(data, "var.labels") if (replace.strl & version >= 117L) { strl <- c("") names(strl) <- "00000000000000000000" strl <- c(strl, attr(data,"strl")) for (j in seq(ncol(data))[types == sstrl] ) { data[, j] <- strl[data[,j]] } # if strls are in data.frame remove attribute strl attr(data, "strl") <- NULL } if (convert.dates) { ff <- attr(data, "formats") ## dates <- grep("%-*d", ff) ## Stata 12 introduced 'business dates' ## 'Formats beginning with %t or %-t are Stata's date and time formats.' ## but it seems some are earlier. ## The dta_115 description suggests this is too inclusive: ## 'Stata has an old *%d* format notation and some datasets ## still have them. Format *%d*... is equivalent to modern ## format *%td*... and *%-d*... is equivalent to *%-td*...' dates <- grep("^%(-|)(d|td)", ff) ## avoid as.Date in case strptime is messed up base <- structure(-3653L, class = "Date") # Stata dates are integer vars for (v in dates) data[[v]] <- structure(base + data[[v]], class = "Date") for (v in grep("%tc", ff)) data[[v]] <- convert_dt_c(data[[v]], tz) for (v in grep("%tC", ff)) data[[v]] <- convert_dt_C(data[[v]], tz) for (v in grep("%tm", ff)) data[[v]] <- convert_dt_m(data[[v]]) for (v in grep("%tq", ff)) data[[v]] <- convert_dt_q(data[[v]]) for (v in grep("%ty", ff)) data[[v]] <- convert_dt_y(data[[v]]) } if (convert.factors) { vnames <- names(data) for (i in seq_along(val.labels)) { labname <- val.labels[i] vartype <- types[i] labtable <- label[[labname]] #don't convert columns of type double or float to factor if (labname %in% names(label)) { if((vartype == sdouble | vartype == sfloat)) { if(!nonint.factors) { # collect variables which need a warning collected_warnings[["floatfact"]] <- c(collected_warnings[["floatfact"]], vnames[i]) next } } # get unique values / omit NA varunique <- unique(as.character(na.omit(data[, i]))) #check for duplicated labels labcount <- table(names(labtable)) if(any(labcount > 1)) { # collect variables which need a warning collected_warnings[["dublifact"]] <- c(collected_warnings[["dublifact"]], vnames[i]) labdups <- names(labtable) %in% names(labcount[labcount > 1]) # generate unique labels from assigned label and code number names(labtable)[labdups] <- paste0(names(labtable)[labdups], "_(", labtable[labdups], ")") } # assign label if label set is complete if (all(varunique %in% labtable)) { data[, i] <- factor(data[, i], levels=labtable, labels=names(labtable)) # else generate labels from codes } else if (generate.factors) { names(varunique) <- varunique gen.lab <- sort(c(varunique[!varunique %in% labtable], labtable)) data[, i] <- factor(data[, i], levels=gen.lab, labels=names(gen.lab)) # add generated labels to label.table gen.lab.name <- paste0("gen_",vnames[i]) attr(data, "label.table")[[gen.lab.name]] <- gen.lab attr(data, "val.labels")[i] <- gen.lab.name } else { # collect variables which need a warning collected_warnings[["misslab"]] <- c(collected_warnings[["mislab"]], vnames[i]) } } } } if (add.rownames) { rownames(data) <- data[[1]] data[[1]] <- NULL } ## issue warnings #dublifact if(length(collected_warnings[["dublifact"]]) > 0) { dublifactvars <- paste(collected_warnings[["dublifact"]], collapse = ", ") warning(paste0("\n Duplicated factor levels for variables\n\n", paste(strwrap(dublifactvars, width = 0.6 * getOption("width"), prefix = " "), collapse = "\n"), "\n\n Unique labels for these variables have been generated.\n")) } # floatfact if(length(collected_warnings[["floatfact"]]) > 0) { floatfactvars <- paste(collected_warnings[["floatfact"]], collapse = ", ") warning(paste0("\n Factor codes of type double or float detected in variables\n\n", paste(strwrap(floatfactvars, width = 0.6 * getOption("width"), prefix = " "), collapse = "\n"), "\n\n No labels have been assigned.", "\n Set option 'nonint.factors = TRUE' to assign labels anyway.\n")) } # misslab if(length(collected_warnings[["misslab"]]) > 0) { misslabvars <- paste(collected_warnings[["misslab"]], collapse = ", ") warning(paste0("\n Missing factor labels for variables\n\n", paste(strwrap(misslabvars, width = 0.6 * getOption("width"), prefix = " "), collapse = "\n"), "\n\n No labels have been assigned.", "\n Set option 'generate.factors=TRUE' to generate labels.")) } # return data.frame return(data) } readstata13/R/readstata13.R0000644000176200001440000000106115002626101015015 0ustar liggesusers#' Import Stata Data Files #' #' Function to read the Stata file format into a data.frame. #' #' #' @author Marvin Garbuszus \email{jan.garbuszus@@ruhr-uni-bochum.de} #' @author Sebastian Jeworutzki \email{sebastian.jeworutzki@@ruhr-uni-bochum.de} #' #' @name readstata13 #' @aliases readstata13-package #' @useDynLib readstata13, .registration = TRUE #' @import Rcpp #' @note If you catch a bug, please do not sue us, we do not have any money. #' @seealso \code{\link[foreign]{read.dta}} and \code{memisc} for dta files from #' Stata Versions < 13 "_PACKAGE" readstata13/NEWS0000644000176200001440000000755315002657571013107 0ustar liggesusers[0.11.0] - Initial support for Stata 18. Import .dtas files (Stata framesets) via `read.dtas()`. Alias variables are currently ignored with a warning. - The `select.cols` argument accepts either variable names or column indices. - Fix compilation on musl and other non-glibc based systems. - Add package alias to readstata13.Rd [0.10.1] - Fix writing `NA` and `NA_character_` values - Fix writing of STRLs on big endian systems [0.10.0] - Fix sortlist attribute for dta format 119 - Fix compress option. In the past, unwanted conversions to integer type could occur. - Fix encoding issues in variable and data labels - Fix for reading/writing of format 119 - Fix build on FreeBSD - New feature: improved handling of time and date formats - New feature: collect warnings from read.dta13 [0.9.2] - Fix build on OSX [0.9.1] - Allow reading only pre-selected variables - Experimental support for format 119 - Improve partial reading - Export of binary data from dta-files - New function get.label.tables() to show all Stata label sets - Fix check for duplicate labels - Fixes in set.lang [0.9.0] - Generate unique factor labels to prevent errors in factor definition - Check interrupt for long read - Fix storage size of character vectors in save.dta13 - Fix saving characters containing missings - Implement partial reading of dta-files - Fix an integer bug with saving data.frames of length requiring uint64_t [0.8.5] - Fix errors on big-endian systems [0.8.4] - Fix valgrind errors. converting from dta.write to writestr - Fix for empty data label - Make replace.strl default [0.8.3] - Restrict length of varnames to 32 chars for compatibility with Stata 14 - Stop compression of doubles as floats. Now test if compression of doubles as interger types is possible. - Add many function tests [0.8.2] - Save NA values in character vector as empty string - Convert.underscore=T will convert all non-literal characters to underscores - Fix saving of Dates - Save with convert.factors by default - Test for NaN and inf values while writing missing values and replace with NA - Remove message about saving factors [0.8.1] - Convert non-integer variables to factors (nonint.factors=T) - Working with strL variables is now a lot faster (thank to Magnus Thor Torfason) - Fix handling of large datasets - Some code cleanups [0.8] - Implement reading all version prior 13. - Clean up code. - Fix a crash when varlables do not match ncols. - Update leap seconds R code with foreign. [0.7.1] - Fix saving of files > 2GB [0.7] - read and write Stata 14 files (ver 118) - Fix save for variables without non-missing values - Read strings from different file encodings - Code cleanups [0.6.1] - Fix heap overflow [0.6] - Various fixes - Reading stbcal-files [0.5-3] - Write dta-files - Read/write LSF and MSF files - Source testing and cleaning - Support for multiple label languages (see http://www.stata.com/manuals13/dlabellanguage.pdf) - Additional tools for label handling [0.4] - Convert.dates from foreign::read.dta() - Handle different NA values - Convert strings to system encoding - Some checks on label assignment [0.3] - Reading file from url. Example: `read.dta13("http://www.stata-press.com/data/r13/auto.dta")` - Convert.underscore from foreign::read.dta(): converts _ to . - Missing.type parts from foreign::read.dta(). If TRUE return "missing" - New replace.strl argument to replace the reference to a STRL string in the data.frame with the actual value [0.2] - Read stata characteristics and save them in extension.table attribute - More robust handling of factor labels - Set file encoding for all strings and convert them to system encoding - Fixed compiler warnings [0.1] - Reading data files and create a data.frame - Assign variable names - Read the new strL strings and save them as attribute - Convert stata label to factors and save them as attribute - Read some meta data (timestamp, dataset label, formats,...) readstata13/vignettes/0000755000176200001440000000000015002660533014376 5ustar liggesusersreadstata13/vignettes/stata_strl.dta0000644000176200001440000013230415002626101017244 0ustar liggesusers
118LSF11 May 2018 12:25
%f e ))G*4Ĵplatformarchossystemstatusmajorminoryearmonthdaysvn.revlanguageversion.stringnicknamenotesimage%9.0g%9.0g%9.0g%9.0g%9.0g%9.0g%9.0g%9.0g%9.0g%9.0g%9.0g%9.0g%9.0g%9.0g%9s%9splatformarchossystemstatusmajorminoryearmonthdaysvn.revlanguageversion.stringnicknameaa<e*!Ye*Q0R0 Y<@ Ye*!YP;Ce*P;CJQe*yKQe* e*e* CPaa<e*!Ye*Q0R0 Y<@ Ye*!YP;Ce*P;CJQe*yKQe* e*e* CPaa<e*!Ye*Q0R0 Y<@ Ye*!YP;Ce*P;CJQe*yKQe* e*e* CPaa<e*!Ye*Q0R0 Y<@ Ye*!YP;Ce*P;CJQe*yKQe* e*e* CPaa<e*!Ye*Q0R0 Y<@ Ye*!YP;Ce*P;CJQe*yKQe* e*e* CPaa<e*!Ye*Q0R0 Y<@ Ye*!YP;Ce*P;CJQe*yKQe* e*e* CPaa<e*!Ye*Q0R0 Y<@ Ye*!YP;Ce*P;CJQe*yKQe* e*e* CPaa<e*!Ye*Q0R0 Y<@ Ye*!YP;Ce*P;CJQe*yKQe* e*e* CPaa<e*!Ye*Q0R0 Y<@ Ye*!YP;Ce*P;CJQe*yKQe* e*e* CPaa<e*!Ye*Q0R0 Y<@ Ye*!YP;Ce*P;CJQe*yKQe* e*e* CPaa<e*!Ye*Q0R0 Y<@ Ye*!YP;Ce*P;CJQe*yKQe* e*e* CPaa<e*!Ye*Q0R0 Y<@ Ye*!YP;Ce*P;CJQe*yKQe* e*e* CPaa<e*!Ye*Q0R0 Y<@ Ye*!YP;Ce*P;CJQe*yKQe* e*e* CPaa<e*!Ye*Q0R0 Y<@ Ye*!YP;Ce*P;CJQe*yKQe* e*e* CPaa<e*!Ye*Q0R0 Y<@ Ye*!YP;Ce*P;CJQe*yKQe* e*e* CPaa<e*!Ye*Q0R0 Y<@ Ye*!YP;Ce*P;CJQe*yKQe* e*e* CPGSOR is a free software environment for statistical computing and graphics. It compiles and runs on a wide variety of UNIX platforms, Windows and MacOS. To download R, please choose your preferred CRAN mirror. If you have questions about R like how to download and install the software, or what the license terms are, please read our answers to frequently asked questions before you send an email. GSO)PNG  IHDR'BsRGB pHYs  iTXtXML:com.adobe.xmp Adobe ImageReady 1 ).=@IDATx}Un MAT Xօu)HBU(J \R]Q"eյ"B`%BQJ斯yrwKH s7s{y9eΜIS@eIiťK3g.`{JFCfYX@j6juUZZ$*ͤQ)K%44FY.FRit 1G̙CZ_}GE>ffVna%JFD!.\XHiHSE]T9ϙ,m$-4^eRJ7OZÌYNKЛI,!^C W(^mYm4icYIt'YmG!8ңf}g5x'< xI2c؆B?/k3 !X gnMd -i[5ʥd$M_}>g &jI\NJl`堁F? >!dBb8?QQ85Pz#iZpl%iGvkD=OIozӮUB<8x}eݒMf#~b,(ֶ`ܹsy.K,=ж0@ LzJS<ڭVҲSS.\,5gaPq K.26Fg{=v+ߟ$1լYz<#ҿߤ4JGJpVR85]ZZjJn nqxkm`Qay '5wI<'dpDH",LaqIA bpN*'iVnv 4#-a-׻~>w^+$Y;Ӳʏ 3:X_ӫ=WllV^V24pp 3ڲe[1SgD V crCIJ]5=S v2 :%`U`5U>dH{fAT=CXiea/Mxap&fvjYr˯6˲p4^?nѬXZ _Qve@I<ĭJ g2CHä@4i q`Q5eX/ЁJ+h2D)C=epK5,2ژRZUizR= қ@㬕w[sϭE/ri7+ yĩ؝wޙSr?H*])Q{c#kpDKkF@3TQY!Ng'mwiL4KjW r`-Wf U0@{E>yĸ&Y8n)\牄.v#hR\ c\NJPDh*_|4zbyAObF<xQ!Q@2T%c1]]JKGFzo|{L{3Q. xN[oZo@Cڽws>{S`~i& opϤjNQ2砵Ec*T"p -5:y&PgrkT:QG3|vsEKlZ;nMc/8J5`L"WL9'F8g \!lhtK0:3Ta(Cc9}ހ4NL8¦LT|A?䐃`*ښN Bͫq:`GYzӏe8|#"E2j1VK!89+6kT5$BI"7Gpol4)q0S;=,TNZeJXrBL!w"MlAKK.S0\R2/"eIUAWSm6<=`:vWg]AdLz/%{zhȻxy1P|ȇ" hY¥Eg,kkJZF !ZSYcPHO,VXp aD"erLaa2Θ*^7/|͢fY, !pۋٮC#?=zCu)ǃӷ,k,m.&`5tꖭFB| %v!orxA80Vc7O%$0}cw ~ʤ0C4ΰPc RZyX/Q4z_A7Yݢw[o$Ɯ_^zZNP<_0-H`! ù^C*7"GiPBi^H'o93Sq4zFCb}!DcNӕOF$Y(}49Kt tM1ә"IEpUBφ?!5`,1Xa_K[]1Gr2v@gz89p;_=OgzH>/\ )b,Yx\1S j\rht4D7#ҍa|DoƂ{ bܰ3MySAh1I2T̘8],a>n0\z)d銇hPMM!"E2@YӋi鮒&H$NwKr# 86[PZ >/2\t]ϩ_2=VX,rA@,-;iIR XhlkZs`T mHpl񠑪hZ&K4˟c5.OOӢ9l`WVw:4@ՀZS.śX3R*Ƃ":`wl gN +GcE!iq))?QdQ$7U*:G*لAa*r@a(gu[x:20=|Jzڀ+bS -y))#3.P e"0!х;y9HIp1'|@Īo|'ZKEd 5 W `)Xw+*. 5L28ZacNgӊs0,*p ydd|C;r[/%=-իV6g?[neFR,kշ-o A]O.ja$wh%xht`I7r Vgy Z ž+*r.Ƨ;%"?#vvǞ(ni%O"P3e 댓x q)|IB,."S'8 K1toXen#Dr㠃#$@GnktM{ Dk> ;ŎY${y5$5 hEW^ []o˱tR){1^/S wBX. H҈Zc$4ly! *a*xu XR ӏ:}'::J= -]{c1ޞ^.M rF% X[|0" Qa4$icʍ-bR9+`vBnGrsZ/Kxq~~Ј}OPֹbNįG7ߎ59ҤiKif!3_/HYG_2~"K`Ԓi%ۆ c,ʩR|ɾ Id4;Wzb?JTz/`)H&I5($Z tNĭIV jF]*'7;g,}P%Kl%' s]~ /MŮӷx%vz}51ApFu7VL_`u0VV'~Xaᇳ2,lṀ]طsczWx]w}d"k}sE!~桟F. 0_ "~Gr8VLRq5!,PkxGBk1 ?AH7.;{?'$! Epø3fVQwΠª]p;8W\w! *ΜI;!:5u-I >xDpnTk 7qvvմMiʕ:(6Zǵ$4ASd59Tx Dž!pxXxxw9к iG˽Y&I+ٵAkrymZwG ~XmэZN{7anܟXuq]uA@NB*$rkfqb{^߁uruf3&6yˊ+hd?9{*] 0eUy(8d:M\Ѯ[ԭ4\7'?{yY.6N`*ӥ9oOOma<IJ{8MY,p>#sȭ^ E $(GWA-z0Ρ$d> <8  x/uGdM $y,+!@ $_kq"9oȺ+[ QR\ڨ>[j(SwE+TW"G\X^;\| +h`_5|7a!Qt_Ca!\E<83DȚcPGG~F%{.z`9H^p ܯ(,d0|>%>*DgXn9 vrb;wԻx80΃5$ۚz,gz ,xDTyn,r=\"i3KicL8pz=x)o,O m#ܴ'/sLʣy(&/$C*D:0Q4!]%I6-)Ch7F폺:å<ӅMšIJʋpGxǴpv1+_9?80|>IIspo'ݥLvh/7%)J`F CNӇ8Ɍ14 7F:OƷ[+ h[ٮp#aK[K//u+˚׿諏zLZ6Ji`+G}77%`BV083*.g&?ŀ$k=\B ]niKH;ʆ$XHQaI0*3J*p0V`49lT C$[\tJ>ߟƵ(nC|:Kץgb/z[+"U"i 0:QSNU6|8.]fλ+<49186%䎝??眯Nr9<{lt- [phCōbÕKS.k9Ӧ=@\$Z )ww0ʵ$˨ B)_Ⱥģ9{ҡUSNZ X4< μ &| FQ5j DcB("( ,p H'.ւvޮ7´K՞Ϊ4n5s)1D7nH- Wa8^eKK8!3 9 8V;$9<Sƭ80Mcz7_[8-`C,Bq E$@7gPN2)[!c߅;ʨY}jBZzuH~t"sp h qX"O14^dHY7 t0i^ͻ}H24@/*}xK9ޅ 8SwXOjХ+ /۷\.팇( DdYe;:h$q(C/=Fz6ak9Xl98@8is0Ψ^P&v&Z"6cd4>s8G5dK&bqJS I&?+j|w1k=IZOw A bn8G_Y>4QH)*zfl jaOgz)9@ct襬&:șZ&B 1^d^.kpLpo; :B^HF!D@8 6XD`|b) Zf1H"Hj5B),YY/U^AkFO$Fs dYar֞nF{\]&{~_y)\X. rR>v/ FS\F9%M,Z`@~Oo}C 0b!*^ )!⋂ eO/$9\;-UN.B Y-%e9z0V8G 8 w `M )&?4fp ! 40"S4! `APF 'kEU_fg<ȵ|"գ*Ɯ&\Z+DA0,/F8"NEs WCPNxG:ojj3R=d!'\އuyRM6A Q,EaO-mK7 ٹᴻ1R_$-v9c6J)g2|8VA1e9 XP]rşѣe! yBG+p#="?,embC<ݸfMO ]n vI0q4Y0\|eG01 [*vMc*F0&WQ|#<8sahnI]h{E$/Pf;e_M%B ML[)3kʹ&"W >b@ ;WIƝ 1ܞ@͟0l4'7RH3KRx!m Fx"v\GsͩPpjaENPwc=([e Ptjه;>#9pT)}2D#=LE+uҜEL h6s PsshHPDԔ^ Iw3zSLh9IzYmId8T0&^/am$5-oGr"9x1C!y61WԠD/AzH(ف.dH5O^"^C*g1"[9Ę2$2#Fh-~"=$S;-FK `P^FR}^iZܵU"\0vh-ńf- vcdLZh9j\Pn_kVpY b:Diؚdq,u!GeDD9hDE Cpj_HđyH͋F`PWh{9.?"$srŪH:q7DU.C9N`#i d_= E cg6D^ dk[8N%-77V}_~F<lG|(.׀RiBXi$FX`*ӜEBƀ2?Z +&' 6NY 1-:йy]@cpTe:4͝k>"lW|1lHfI꽪).! )2"q*~!QUqWNh{FP;x Uj7U@TZ!Pju1yq\̆hD KDa4ʌTYc?ڳrCBDM.ٱ'Ƌdweh2hOj,nz(s [7R{v둑l3Lm[ðA1%_c8]%c},#zC ,6 ZLmn%;F-?y}#U7 WnLV}aفsp5 FQPc 4;H|ȫ:KH5e;<!͂WY/A uz!LsI+•Y#sM IimYg λ!mHù^Cn`:e|lC|3j:Z`3Wh_T!rU8Ƿ𭹿bϘ^u<ѡQ䡃 Uj$ C*rWCB(PWwpgmcwl~cdF n1v9P|tdrA ?Yp\}]}/A7nԹE`b!I@vl~]K K8" `49>["8w%ӄؔ}}ҽdjA(OYyQC6G)fs7Rȯ&iLJ/%i5"VM7[iWSX]2م̋*=@mΡ)O>pH1EHnݨ Gxώ;9u'|66В([Ae R%niSqIvfnlp_q9!.OV^g1Z9MB4tHD#MBp7>6 trH<|idyR {}[7r[mӮ& Akb:-j{W!(`RQibqEMǀr1ܜ:MTTo5vPILFEiKǓ$'e ݘ)IADؒ` ~Z[$O0~ʳ,ظtq$93@N>B|8˷ۿ/O+k$:Z8 ëdH:N Til<[)v˱>Lshg/P]'g 1yDL7b69) Bpnl51xN"ёsl!+jMBN:TQ]\-@BS8EH{6zI.:}_q wCZׅFIUj91<ƫ׼Tݭ5ip0pV.AC4")aI2 X00 L:!0Zp2<βa65+Kq!uE,Dqd1s 7z5@ ()jʞg6R?ӎT7 J 9G)ddMZI/lH9 l)Ic 0dX />\K\%t y6:^`z/([GmCR2$V;T=PLk-O+$t~@b$=eˠ]b&)YE捙f8XnW$Cc1 P1EB9 X6{h2z=R+$6̂iez8L nfs)D,I.$ ~hMEx0C._D B&5ģ>V# iS8xS$8R;ʦ"x/E-4mdgfK&zcT J`a51CzηFJw 0RXA8ci!hhI⫬$?H aJp#pp,YYiЃQ%ht%ÊGEB\$I(`e(Cɀgxr.s/U$F|h^'UPjԒK]UtDҍ#J0   R^u {SH5$pQzVHy&4n h+,!D ; U,!A2"YIH%iɪc Rq- qdbtA<GV}<+93'*)vZI3LqJQgWGJN!'Mt#-97Ix+Y8SwZqNlIH8rz9!~mB@F\We3I:J60Y\; n6@\ Te_nbiozTv#6gxl*S6w9nv@wȈY A2nS)lā\xPL^TQ3phd`j_# bGK/X j@b BXwcG}~z-i9[=sH. ^ ̚+N܁Җ c8Ck"I •w 0J=H{)7y;<1=؜ח4xp0 B)$HHgjs9s2= yX`->)m 2t+ubI:N+i^uٸ:KoDQN0o($ ރD2gHdY&v7<.7s䙃ʡH]Piw?9fH7>T< u6 -Be8=e4D1_r Y,t$`<`sXgC]Ay ,`/Q,%y)ZW҇E_1G.O#3NҦH{[|qbA=t N.Ffc)@f<άqS$8P^a,d!p@y+ͬtNA3.}fC,,ɧd' 4z@>EtM)bȽJ5Yw- #-n- \X7:H_,Y /3R z$ؽ%E1~Ư*!~h *8@(t\NT rB'6k.$ўg~4ʵ~| Aha\9b#\ frBcfEB*>%| T5urxO`p)|N?f'y ߄GW𮍃e޼:W琻 TFNj zᜮC,$Px~у~Iepv3`egyfO_Z (F/MxG23 ܠ2oNͣJ׽u,?Ta R[;M#c A!P^(W^w{trclsE Y"DЀ^KguΈnÂ'ppM66I]< f8n *reS5> hK$K;j#ͺ2[1,r9tic,|cGkr_}w94Ѹd50:$jݭ䓗۰C,aoJڞwu1/PjEW>q`0 A:OETg޼P^ykH1bޱ6iBX@gGs, CĠ¶<XCْGzy?\QGp6:CCcyqoKE/+(i$Mt q\9s!xxn8?빽YQ}Q5Ip,aXwcxЏOfQQaѢ3;$w hD+눢yQiiAP 7 5J)~(/u^WSPЄ3C7 /$4.dY4˚;~XS Q$GM+_T#_s mww~w&X>2e w<ǽé8 RBNxwu(/kDjHJB< T1<+gD'ȋB iZ.0R<*E=}gḧd&\%|1Zqѳ0`rcW~.ֽÍGqeV8ߩU@ƹ#<(84B'SjiDȺNeSY5KysWH hn0C#;o挂5%|p6" F1-;[y8 ʀPY Z qW%j^z?>aVM$f! B)@m0\^`rG(=nA`PDrI*)ƥT98GΡH4K]h}-p[8vqig͆^b8E!PDtz!XyԿk% M[|9sv)cSl&V;#Ӊ tRx$aQOy$`TFF}hv63i8Cd<8m,a1\ӋdR¥B<\n;iA0v!cB%>JS#u<$w@2>܃8ŝq4ݴ"@'fwOO 뮮tӾ"fi G4ku}K*\PbÝwɚ8"zO (im3Q!X,,Lp K;xvۍoDJ➒CSc* ] `@qV]PTBr;xDH %_,pK9A"|8Y>8;ȮL4eYGkݯvy$`ji`j/LVkr+RPо[B'}}Q[AaŷZz{Ә").ͧy97IZ S$W [r* Ft똩x:_|e{pS `(9Tf^xP,v>^CZ)I18օb:(eLO \ї5tpT$M3,eWm y·|yXw$|j>ţ Jaf{:fek#Ï`s?1yLڂF! ]bůzkO@pHh Ĭr.⤁ ƝZ!鴛0Cd3xNv(Prez&dNҽK~sSwoA,  SR6Jrla%`srMz a)/+ӁS, M:t;r!5tM^L"(#6(ol5JZ}_lx7-^Gnl9(k/t;2: L2[a1kĆ0?A^&dǔB& Ċ|&P@=hH xbdQDRqDs*/n: [`~]z"ic"nK 15!2!A*UdQ\("(/ rD8/W:cѡc hwt/l͈#Ns÷+GgĖF.wO?댮Z<8]3%ZPX\sɟ:O-c\$T7e>Xn2m E)( KUF\.b$J88=μ# pQ%OIr 5rWuI)u2pڜZ`I>D(Wgv"# vǮ옡j܆dEeٞ]kAs,S-u*}ސ>cjXƩ>O&?[A)4,<( {qUW_\$Vk ߫K Gz&%V N" ,9a8ڹVN880E7+}} >:^rX=:L$FxUbu:cp r aviz"/x:\ Ol R"nʵ2vXr1?Y93A8\c1<<3%9y9kq_Er)œ)^ZS(ޅٷ'N}}}Ě-*\&xJ`HubA\v +;]B%!!4/ BWF1s00\ΌZB"D+ Qy/>Ywڻ33橁3xPK gDKK@#Ë@8" ^F_ ^[ _/ GPmDg'_ zK%gq֝"cWO`]vuw388aapȯ{'?y҃yZCK~ d9ڟuB?dʕh.s4PMbO9f !4^`_Bc&/"3k)O^.P"Bl~bǤAD5fF*ȩ=k-rXPL>.R2Ɵ˵\ .fj-.vtX{se; g2;G|lUkG!Jo ýꩡ@MKֵpZ._*\: eNBb1RpHx Yc8w. )_"Ȭr)H0V>;V.NKWJ3mOܓ?D&iO9b̻:@)ՕɽˊR_=/=@l\>O77vc=,lTiDY4m䉷CF'|Jڮq.~~o_C#sfas\ ﹵h>4޾@~1,H !$#4~)5N'6Ⱦ|;9UQSdp3cW|*ŞDMδ u )zKB?ld ;#o|XFHU#s<*֒abYD2`F:HwI}Ī_sAN9t0p\Yk uXa)ZK2;-g"}\)N-1ers\!ܙ! Z xFGlނgv}@]Ftgj'P/okpI:S*P!L&)e<%v}8u U[kV9AxDe0]֪ *E|C޵(א)W-/L y-bTHZxAi)HcVRW_|tu%uox޲^~pO[,gj&!oTX‡5~. z\0XAuM'vD$kֻ$W\?0"BEKL.txKb,h# Lř@(FF(ù^<+6G*T4+a!^ƲenX/~W6v#>)zl ϭom8&bF/yIm(y77Nm*:(Ej|:Dqĉ}};\/!:e]< ޾7lb/Q$\,G)S 75shy 9W:0Q aF7!!mWrTY(/;Y|yo- y)b%rMSłpѓT3 +(A?RBjDiIpA#-YxүM>T5aX5h TCG5|%fߨ&%z (Q@48&`xIp-Q6˥֚Hy6R24o/<.Ox)حҖ}BO x'/jIݿwڧasIlS L` Rsf-dI- /HQBOCb ]8NXLKW$:DYckx5P7y5fS}衮_au-]}n 1}AjyP9'l()FW_}.kl=ͫYR 뙐8 "ed Zr w0eԍo{ [ɴhh~ Lbj2:2VO}c3s15ƂO\}q>k]U4B.Bh{n(K"^be!p8 aر_h%l#P87/qb9\]f|nrc s }`?Q ';D}5m's*+Y_pڂ@E5}wlY}0IVh ]4M=3J x1ͨ܍6 D*" w y^i$aK@wkeB߼T.%#l5 vv&-'q I$ă֩X{|()G:.7lhdvC"RNʘQ0x÷~qm{°$`NLDN$"TI17;.bFj1rf~xkVѶg2a48%.gQloUFk\QS?8<+QOkl Z@ WB#N 2Ek!Ξ< w8$3MWo" W^!+8IGH\֤A+ /o=8(0R#\Y϶笳<YZ?5\Д'ccADcPcĐpoo௜rŦ?™'l9|܀\n$nM.T)(#z{{K3mnOΗ92T%1%\0("aHED.)h84Bnl6tn~3t_jY6L/fO4OL)c޺XL5yo4~- ;XyIy1ך0LR]Dl4M%MINwk$N(,{DݭXFkӴRWOك>ǏosUoa `~L^ zoB Y˛ IY/S^/Q^'"~Ou| M,]>Xq=}r6xltKon DzN^3ch+|% ʣ&\BKXAH8)5F)MAtA  %JOWq8aEu%f7_Gj6 _Sr.XXnHb~p,$yzS 3-x8n;٪?ml]BLubˆ4g 'k9Gpb1{x^Xۜgvj-8ؤLyu;:ʕIj(XSXeӲ[>Y(P!JpBzH :PJ,):9q4"rV ,"`ޕU\{bg#a3V (hR(JA"BZ%"%jeS jAb-QITos̛{؎cyޙ9sΙ̹ܙeCTD*KC3ӳ X^,S삮V\a/_3ج )]G&i֡'?a\5@Mz3B`7G(ŌT>.L<#d2C s Q- s܄a+no [` [mEkqh\݇XXƼjJ)':nW-(9fCU<ԯXZ9I-Hj  HV'Ù>:&ՍjN:>҅Z?!%rP ~ )7#AHE^IO\I<7(@1O.lkkŘtt fA7?Tw,&Mjƭy`/;:w/-qgf+TR *cB< oRL'@ϿReJ 6K{r)|IJ ES- gʰŗb?c3W/Aaaf Oȁo)92슱IS**!S!$ ,RBpZVNVq4DskFG.H$y!sz =opڒ m9$%Q^k=6~f>Z7aTKey_Y!i$ (G|: Ji7 тSK+1i?qV CEf 0u!|&-\"|e@ .MuLG*+&H`PE؇~: 0"oY?H#go:|qاsQGXaDۣ$n=h\4Ͳ+!C"2.l::bt\UI, c)6f^Y_ab1|6>8Pc*epg&Mv*vP Y`]@`ի|ܲY] T! a<8 *eo9Hz:eIAȳpR$ Nz0 -,8*qaJUN1/nw#:+aaX&R; y->~{4}r7ޤ}!PӸ`%!R\<,[&:/W+|_՘hO|'+EMzSGZnN|mζ<`X :2!1*{)1gmavƮ5HY_Z'$$7b:쀌Go -Mt DǢn@ .qL=AX[RQhU^&\VpK *4Xq5Eh ] \8yɟzf( (8+Ex^тkפ-˗cA ,[6l`8|,y$gMIN/_`&6ZԃYf h9.UJK]m4K1\8 5.Qt7vݩ`"՛ow@zQJezY&ǖ=3]:h|0|_ 0&t6a A#r+aUʣV<[ {(yT؆ԇPMR.y]W >3NJ|ѕ|җGr 7$54"2hy4ҿfx/P G 'Q/kĬ|ihNHOF+3;6Yi^nm&{Xc0%@T,&$2-L.U &0DV͟߻54| |! lS ÆO<_;.qL" ŽʄBc7ёQ5|{Mw}Qs B ѸEhu99衅YB=UxCTϓ1?'?n0ڤQBK0;#}4 ?z>bB%%Quel¶< 0#U%"4m>`+Z|I t }S!C,_&ak涜߼uDXĬZ /2Fo plHñ 2nC J7)SZ!wgA9buP} vlٲOp}.JRⴴ @4KX3QCr0ZiU;. l*Ȱl"#(%x.a2pZr5`﷑\bVctTmkqVcwɂ.6eawlPC H`d%"ӽi*0 l@tTt )ӡ"&t(&*y/ptD 1 Nc5"ٶnB5** A-Aˌ h  l ] @Hva8X]&9"L`5d4@>t eA w'OM$j)Jq!uU.cj||zi݈vLFC7Ɇ"΋eϢ_9sģp #yDC5$#6ڎ1U:a"M td:*C Xe8P 0c}1a=9G> XVo&Va,{L\hpH8׷iIW/$o:C n*ȀE3܀F22.42< 4[%2<ӆ0NٵzFZ$[-6Oao^A'p \Z$\|k6\"WuQSAR9r<*aKϊvG޺!`U,:pFkxHOW&9l]-ƒ<4r"k~4φKGeN`q]G]1Q^.X>$%i*HP=JF/8SeDUå.XuG"*۰ k#eb9<|%*~B7`Bi>ߍK'Um~,8mϺTzYKaQP.vo `k7 <)l`7.kRǁV {sT[VTXP9l!Yi?PP/ⶶk;Ni!]Tt鏒RL9*= A q#p:Q݉"X.ڄaa\52ץ,/FF"wWZwŒ)Gb_ ]fؤ0AK@ߌ9!:)ik`5ѶD¯?.qBF ʤu+U Sܬ"*섒VJoS[YAfq2z2D[k\.=ITLԋB xIP xpPЌt\tF*V* 綌5omH⮵W{}/va+IR34fo~`dI|y y̰-5;"|L2`o*aJT+NͷNǘ[&䊓9|6_o 6η恋WqcJ`79/ܵBa."\ç =7Tj܏V㸀o$I88h _+/zy/Vt"t.[e|TW?&@J_\cjXmtoE W˲Y!\˳w]52AtE8P C#&U.0ܖK]{/:2 G*@ p(t=Q*h23N״]ѱcu1_tbl'bd)P[rIENDB`$platforme*Rە!YRe*ەe*<<0x86_64-pc-linux-gnuarchorme*Rە!YRe*ەe*<<0x86_64oshorme*Rە!YRe*ەe*<<0 linux-gnu"systemme*Rە!YRe*ەe*<<0x86_64, linux-gnustatusme*Rە!YRe*ەe*<<0majorme*Rە!YRe*ەe*<<03minorme*Rە!YRe*ەe*<<05.0yearme*Rە!YRe*ەe*<<02018monthme*Rە!YRe*ەe*<<004dayhme*Rە!YRe*ەe*<<023svn.reve*Rە!YRe*ەe*<<074626languagee*Rە!YRe*ەe*<<0R-version.stringRە!YRe*ەe*<<0R version 3.5.0 (2018-04-23)nicknametringRە!YRe*ەe*<<0Joy in Playing
readstata13/vignettes/readstata13_basic_manual.Rmd0000644000176200001440000003566615002650213021666 0ustar liggesusers--- title: "readstata13: Basic Manual" author: "Jan Marvin Garbuszus & Sebastian Jeworutzki" date: "`r Sys.Date()`" output: rmarkdown::html_vignette: toc: true vignette: > %\VignetteIndexEntry{readstata13: Basic Manual} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r setup, include = FALSE} library(readstata13) dir.create("res") options(rmarkdown.html_vignette.check_title = FALSE) knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ``` The `readstata13` package was developed to address compatibility issues arising from changes in the Stata 13 dta file format. Prior to Stata 13, packages like `foreign` could handle dta files. However, Stata 13 introduced a new format that resembles XML.[^1] Recognizing the need for a new solution, we (Jan Marvin Garbuszus and Sebastian Jeworutzki) created `readstata13`. Leveraging Rcpp for performance, the package has evolved into a comprehensive tool for working with dta files in R. [^1]: The dta format for current versions is well documented at and also in the corresponding manuals. Key features of `readstata13` include: * **Broad Format Support:** Ability to import and export dta files across a wide range of Stata versions, including many undocumented formats. * **Handling Advanced Features:** Support for features like string encoding, multilingual labels, business calendars, long strings (`strL`), frames, and embedded binary data. * **Enhanced Functionality:** Built as a direct replacement for `foreign`'s dta functions, with added capabilities for improved label handling (including generation) and partial data reading (selecting specific rows or variables). ## Core Functionality: Reading and Writing Stata files Importing a Stata file using `readstata13` is straightforward, similar to using the `foreign` package. The primary function is `read.dta13`. To save an R data frame to the Stata dta format, you use the `save.dta13` function. ```{R} data (cars) # Save the 'cars' dataset to a Stata file save.dta13(cars, file = "res/cars.dta") # Read the saved Stata file back into R dat <- read.dta13("res/cars.dta") ``` Beyond the data itself, `readstata13` preserves important metadata from the Stata file. This information is stored as attributes of the imported data frame. ```{R} # prints the attributes attributes(dat) ``` Examining the attributes reveals details such as the Stata format version (e.g., format 117, introduced in Stata 13), a data label, a timestamp, and information about the data types and formats used in Stata. In this example, the `save.dta13` function wrote the numeric data from R as binary `double`s in the dta file. The byte order (endianness) is also recorded; `readstata13` is designed to handle both Little Endian (used here) and Big Endian formats during reading and writing.[^2] [^2]: A detailed explanation can be found here: . The package automatically manages the conversion of Stata's missing values, value labels, and variable labels during both import and export. ## Supported Stata Versions A key advantage of `readstata13` is its ability to write dta files compatible with older and newer versions of Stata. This is controlled using the `version` argument in the `save.dta13` function. The table below lists supported Stata versions and their corresponding file formats: | Stata Version | File Format | |---------------|-------------| | 18 - 19 | 121 | | 18 - 19 | 120 | | 15 - 19 | 119 | | 14 - 19 | 118 | | 13 | 117 | | 12 | 115 | | 10 - 11 | 114 | | 8 - 9 | 113 | | 7 | 110 | | 6 | 108 | While this table shows the most common formats, `readstata13` supports reading files from Stata version 1 (format 102) up to the latest format 121 (used for files with over 32,767 variables, readable by Stata 18 & 19 MP).[^4] The dta format has evolved over time to accommodate larger datasets and longer variable names or labels. Although `readstata13` can read virtually any format, its ability to write files that *fit* within Stata's historical limits depends on the data size. For general compatibility, it's recommended to target versions 7 or later (formats 110+), which aligns with the default in `foreign::write.dta`. [^4]: A [development branch](https://github.com/sjewo/readstata13/tree/116) on GitHub even include support for the rarely seen `116` format, for which only one public sample file is known to exist. Here's an example of saving a file compatible with Stata 7: ```{r} # Save the cars dataset as a Stata 7 dta file save.dta13(cars, "res/cars_version.dta", version = 7) # Read the file back and check its reported version dat3 <- read.dta13("res/cars_version.dta") attr(dat3, "version") ``` ## Working with Labelled Data Stata datasets often include rich metadata like variable and value labels. Since base R data frames don't natively support this, `readstata13` stores this information in various attributes of the imported data frame, mirroring the approach used by `foreign::read.dta`. Let's use the example dataset "statacar.dta" included with the `readstata13` package. We'll initially import it without converting categorical data to R factors, keeping the original numeric codes. ```{r} library(readstata13) x <- read.dta13(system.file("extdata/statacar.dta", package = "readstata13"), convert.factors = FALSE) ``` Variable labels are accessible via the `var.labels` attribute: ```{r} attr(x, "var.labels") ``` You can retrieve the label for a specific variable using the `varlabel()` function: ```{r} varlabel(x, var.name = "type") ``` Value labels, which map numeric codes to descriptive text, are stored in a more structured way. The `val.labels` attribute indicates which variables have associated value labels. The actual label definitions (the mapping from codes to labels) are stored as a list in the `label.table` attribute. In our example dataset, only one column has value labels: ```{r} attr(x, "val.labels") ``` The corresponding label table for the 'type' variable is named `type_en`. It's a named vector where the numeric codes are the vector values and the labels are the names: ```{r} attr(x, "label.table")$type_en ``` Convenience functions like `get.label.name()` and `get.label()` provide alternative ways to access this information: ```{r} get.label.name(x, var.name = "type") get.label(x, "type_en") ``` A common task is converting a numeric variable with value labels into an R factor. `readstata13` simplifies this with the `set.label()` function, which uses the stored label information to create the factor levels. ```{r} # Create a factor variable 'type_en' from the 'type' variable using stored labels x$type_en <- set.label(x, "type") # Display the original numeric column and the new factor column x[, c("type", "type_en")] ``` ### Multi-Language Support for Labels Stata allows datasets to include labels in multiple languages. `readstata13` supports this, and the `lang` option in `set.label()` lets you specify which language's labels to use when creating a factor. ```{r} # Check available languages and the default language get.lang(x) # Create a factor using the German labels x$type_de <- set.label(x, "type", lang = "de") # Display the original and both language factor columns x[, c("type", "type_en", "type_de")] ``` ### Compatibility with Other Packages `readstata13` is designed to integrate well with other R packages that work with labelled data, such as `labelled` and `expss`. ```{r, eval = isTRUE(requireNamespace("labelled"))} # Requires labelled package version > 2.8.0 due to a past bug library(labelled) # Read the data and convert to the 'labelled' class format xl <- read.dta13(system.file("extdata/statacar.dta", package = "readstata13"), convert.factors = FALSE) xl <- to_labelled(xl) xl ``` Packages like `expss` can utilize the label information stored by `readstata13` (and converted by `labelled`) for creating descriptive tables and plots. ```{r, eval = isTRUE(requireNamespace("expss")) & isTRUE(requireNamespace("labelled"))} library(expss) # Example: Use expss to create a table summarizing horse power by car brand # First, handle missing or negative HP values xl[xl$hp < 0 | is.na(xl$hp), "hp"] <- NA # Create the table using expss piping syntax xl %>% tab_cells(hp) %>% # Specify the variable for cells tab_cols(brand) %>% # Specify the variable for columns tab_stat_mean_sd_n() %>% # Calculate mean, standard deviation, and N tab_pivot() %>% # Pivot the table set_caption("Horse power by car brand.") # Add a caption ``` ## Handling Large Datasets As datasets grow, importing and managing them in memory can become challenging. `readstata13` provides features to work efficiently with large dta files. ### Partial Reading To avoid loading an entire large dataset when only a subset is needed, `readstata13` allows you to read specific rows or columns. This is particularly useful for exploring large files or extracting key variables without consuming excessive memory or time. ```{r} # Read only the first 3 rows of the dataset dat_1 <- read.dta13("res/cars.dta", select.rows = c(1,3)); dat_1 # Read only the 'dist' variable from the dataset dat_2 <- read.dta13("res/cars.dta", select.cols = "dist"); head(dat_2) ``` A practical application of partial reading is working with large survey datasets like the SOEP (German Socio-Economic Panel).[^5] These datasets are often distributed across multiple files, structured like tables in a database. To link information across files, you need key identifier variables. Instead of importing entire multi-gigabyte files just to get a few ID columns, you can use `select.cols` to quickly and efficiently read only the necessary variables. [^5]: The SOEP is currently located at the [DIW Berlin](https://www.diw.de/). ### Compression When saving data to a dta file, you can use the `compress = TRUE` option in `save.dta13`. This instructs the package to use the smallest possible Stata data type for each variable, potentially reducing the file size. ```{r} # Save the cars dataset with compression enabled save.dta13(cars, file = "res/cars_compress.dta", compress = TRUE) # Import the compressed file and check the resulting data types dat2 <- read.dta13(file = "res/cars_compress.dta") attr(dat2, "types") ``` In this example, the `numeric` vector in R was safely stored as an `integer` in the compressed dta file because its values fit within the integer range. The main benefit of compression is the reduction in file size. The only notable change is that after re-import, the former `numeric` column has become an `integer`. ```{r} rbind(file.info("res/cars.dta")["size"], file.info("res/cars_compress.dta")["size"]) ``` ## Advanced Features ### Frames Stata version 16 introduced the concept of data [frames](https://www.stata.com/help.cgi?frames), allowing multiple datasets to be held in memory simultaneously and saved together in a ".dtas" file (a Stata frameset). A ".dtas" file is essentially a zip archive containing a separate dta file for each frame. The `get.frames` function in `readstata13` can inspect a ".dtas" file and list the names (defined within Stata), the internal filename and version of the frames it contains: ```{r} dtas_path <- system.file("extdata", "myproject2.dtas", package="readstata13") # Get information about frames in the .dtas file get.frames(dtas_path) ``` To import data from a ".dtas" file, use `read.dtas`. By default, it imports all frames and returns them as a named list of R data frames. ```{r} # Read all frames from the .dtas file read.dtas(dtas_path) ``` You can import only specific frames using the `select.frames` argument: ```{r} # Read only the "counties" frame read.dtas(dtas_path, select.frames = "counties") ``` Furthermore, you can apply specific `read.dta13` options to individual frames within the ".dtas" file by providing a list to the `read.dta13.options` argument. The list structure should be `list(framename = list(param = value))`. ```{r} # Read frames with different column selections for each read.dtas(dtas_path, read.dta13.options = list(counties = list(select.cols = "median_income"), persons = list(select.cols = "income"))) ``` ### Long Strings (strL) and Binary Data Stata 13 introduced "long strings" (`strL`), capable of storing very large text values. These are stored separately from the main data matrix in the dta file, with only a reference kept in the data part. `readstata13` handles these; by default, they are read into R character vectors. Interestingly, Stata also allows embedding binary data (like images, audio, or other files) within `strL` variables.[^6] While R's standard data structures aren't ideal for directly handling such embedded binary data within a data frame,[^7] `readstata13` version `0.9.1` and later provides the `strlexport` option to extract these binary contents to files. [^6]: A Stata blog post illustrates this feature, showing how physicians could store X-ray images alongside patient data: ["In the spotlight: Storing long strings and entire files in Stata datasets"](https://www.stata.com/stata-news/news31-4/spotlight/). [^7]: The challenge lies in R's vector types; standard character vectors aren't designed for arbitrary binary data, and there's no native vector type for image processing or other binary formats within a data frame context. This also means `readstata13` currently cannot create dta files *with* embedded binary data from R. Using `strlexport = TRUE` and specifying a path with `strlpath`, you can save the contents of `strL` variables as separate files in a designated directory. ```{r} # Create a directory for exporting strLs dir.create("res/strls/") # Read a dta file containing strLs and export their content dat_strl <- read.dta13("stata_strl.dta", strlexport = TRUE, strlpath = "res/strls/") # List the files created in the export directory. # The filenames indicate the variable and observation index (e.g., 15_1). dir("res/strls/") ``` The exported files do not have extensions because the file type is not inherently known from the `strL` data itself (and could vary cell by cell). The user is responsible for determining the correct file type and processing the content. In this example, the first exported file (`15_1`) is a text file. ```{r} # Read the content of the text file strL export readLines("res/strls/15_1") ``` The second file (`16_1`) is a PNG image. You can read and display it using appropriate R packages like `png` and `grid`. ```{r, fig.alt="Display of the R logo extracted from a long string."} library(png) library(grid) # grid is needed for grid.raster # Read the PNG image file img <- readPNG("res/strls/16_1") # Display the image grid::grid.raster(img) ``` ```{r include=FALSE} # Clean up the created directory and files unlink("res/", recursive = TRUE) ``` readstata13/src/0000755000176200001440000000000015002660533013155 5ustar liggesusersreadstata13/src/RcppExports.cpp0000644000176200001440000000600215002626101016141 0ustar liggesusers// Generated by using Rcpp::compileAttributes() -> do not edit by hand // Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 #include using namespace Rcpp; #ifdef RCPP_USE_GLOBAL_ROSTREAM Rcpp::Rostream& Rcpp::Rcout = Rcpp::Rcpp_cout_get(); Rcpp::Rostream& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get(); #endif // stata_read List stata_read(const char * filePath, const bool missing, const IntegerVector selectrows, const CharacterVector selectcols_chr, const IntegerVector selectcols_int, const bool strlexport, const CharacterVector strlpath); RcppExport SEXP _readstata13_stata_read(SEXP filePathSEXP, SEXP missingSEXP, SEXP selectrowsSEXP, SEXP selectcols_chrSEXP, SEXP selectcols_intSEXP, SEXP strlexportSEXP, SEXP strlpathSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< const char * >::type filePath(filePathSEXP); Rcpp::traits::input_parameter< const bool >::type missing(missingSEXP); Rcpp::traits::input_parameter< const IntegerVector >::type selectrows(selectrowsSEXP); Rcpp::traits::input_parameter< const CharacterVector >::type selectcols_chr(selectcols_chrSEXP); Rcpp::traits::input_parameter< const IntegerVector >::type selectcols_int(selectcols_intSEXP); Rcpp::traits::input_parameter< const bool >::type strlexport(strlexportSEXP); Rcpp::traits::input_parameter< const CharacterVector >::type strlpath(strlpathSEXP); rcpp_result_gen = Rcpp::wrap(stata_read(filePath, missing, selectrows, selectcols_chr, selectcols_int, strlexport, strlpath)); return rcpp_result_gen; END_RCPP } // stata_save int stata_save(const char * filePath, Rcpp::DataFrame dat); RcppExport SEXP _readstata13_stata_save(SEXP filePathSEXP, SEXP datSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< const char * >::type filePath(filePathSEXP); Rcpp::traits::input_parameter< Rcpp::DataFrame >::type dat(datSEXP); rcpp_result_gen = Rcpp::wrap(stata_save(filePath, dat)); return rcpp_result_gen; END_RCPP } // stata_pre13_save int stata_pre13_save(const char * filePath, Rcpp::DataFrame dat); RcppExport SEXP _readstata13_stata_pre13_save(SEXP filePathSEXP, SEXP datSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< const char * >::type filePath(filePathSEXP); Rcpp::traits::input_parameter< Rcpp::DataFrame >::type dat(datSEXP); rcpp_result_gen = Rcpp::wrap(stata_pre13_save(filePath, dat)); return rcpp_result_gen; END_RCPP } static const R_CallMethodDef CallEntries[] = { {"_readstata13_stata_read", (DL_FUNC) &_readstata13_stata_read, 7}, {"_readstata13_stata_save", (DL_FUNC) &_readstata13_stata_save, 2}, {"_readstata13_stata_pre13_save", (DL_FUNC) &_readstata13_stata_pre13_save, 2}, {NULL, NULL, 0} }; RcppExport void R_init_readstata13(DllInfo *dll) { R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); R_useDynamicSymbols(dll, FALSE); } readstata13/src/read.cpp0000644000176200001440000000405415002626101014570 0ustar liggesusers/* * Copyright (C) 2014-2025 Jan Marvin Garbuszus and Sebastian Jeworutzki * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation; either version 2 of the License, or (at your * option) any later version. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for * more details. * * You should have received a copy of the GNU General Public License along * with this program. If not, see . */ #include using namespace Rcpp; // Reads the binary Stata file // // @param filePath The full systempath to the dta file you want to import. // @param missing logical if missings should be converted outside of Rcpp. // @import Rcpp // @export // [[Rcpp::export]] List stata_read(const char * filePath, const bool missing, const IntegerVector selectrows, const CharacterVector selectcols_chr, const IntegerVector selectcols_int, const bool strlexport, const CharacterVector strlpath) { FILE *file = NULL; // File pointer /* * Open the file in binary mode using the "rb" format string * This also checks if the file exists and/or can be opened for reading * correctly */ if ((file = fopen(filePath, "rb")) == NULL) throw std::range_error("Could not open specified file."); /* * check the first byte. */ std::string fbit(1, '\0'); readstring(fbit, file, fbit.size()); std::string expfbit = "<"; // create df List df(0); if (fbit.compare(expfbit) == 0) df = read_dta(file, missing, selectrows, selectcols_chr, selectcols_int, strlexport, strlpath); else df = read_pre13_dta(file, missing, selectrows, selectcols_chr, selectcols_int); fclose(file); return df; } readstata13/src/read_data.cpp0000644000176200001440000001437615002626101015571 0ustar liggesusers/* * Copyright (C) 2014-2025 Jan Marvin Garbuszus and Sebastian Jeworutzki * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation; either version 2 of the License, or (at your * option) any later version. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for * more details. * * You should have received a copy of the GNU General Public License along * with this program. If not, see . */ #include "readstata.h" using namespace Rcpp; using namespace std; List read_data(FILE * file, const IntegerVector vartype_kk, const bool missing, const int8_t release, const uint64_t nn, uint32_t kk, const IntegerVector vartype_sj, const std::string byteorder, const bool swapit) { // 1. create the list List df(kk); for (uint32_t i=0; i0) & (type < 2046)) ? STATA_STR : type) { // double case STATA_DOUBLE: { double val_d = 0; val_d = readbin(val_d, file, swapit); if ((missing == 0) && !(val_d == R_NegInf) && ((val_dSTATA_DOUBLE_NA_MAX)) ) REAL(VECTOR_ELT(df,ii))[j] = NA_REAL; else REAL(VECTOR_ELT(df,ii))[j] = val_d; break; } // float case STATA_FLOAT: { float val_f = 0; val_f = readbin(val_f, file, swapit); if ((missing == 0) && ((val_fSTATA_FLOAT_NA_MAX)) ) REAL(VECTOR_ELT(df,ii))[j] = NA_REAL; else REAL(VECTOR_ELT(df,ii))[j] = val_f; break; } // long case STATA_INT: { int32_t val_l = 0; val_l = readbin(val_l, file, swapit); if ((missing == 0) && ((val_lSTATA_INT_NA_MAX)) ) INTEGER(VECTOR_ELT(df,ii))[j] = NA_INTEGER; else INTEGER(VECTOR_ELT(df,ii))[j] = val_l; break; } // int case STATA_SHORTINT: { int16_t val_i = 0; val_i = readbin(val_i, file, swapit); if ((missing == 0) && ((val_iSTATA_SHORTINT_NA_MAX)) ) INTEGER(VECTOR_ELT(df,ii))[j] = NA_INTEGER; else INTEGER(VECTOR_ELT(df,ii))[j] = val_i; break; } // byte case STATA_BYTE: { int8_t val_b = 0; val_b = readbin(val_b, file, swapit); if (missing == 0 && ( (val_bSTATA_BYTE_NA_MAX)) ) INTEGER(VECTOR_ELT(df,ii))[j] = NA_INTEGER; else INTEGER(VECTOR_ELT(df,ii))[j] = val_b; break; } // strings with 2045 or fewer characters case STATA_STR: { int32_t len = 0; len = vartype_sj[i]; std::string val_s (len, '\0'); readstring(val_s, file, val_s.size()); as(df[ii])[j] = val_s; break; } // string of any length case STATA_STRL: {// strL 2*4bit or 2 + 6 bit // FixMe: Strl in 118 switch (release) { case 117: { uint32_t v = 0, o = 0; v = readbin(v, file, swapit); o = readbin(o, file, swapit); stringstream val_stream; val_stream << v << '_' << o; string val_strl = val_stream.str(); as(df[ii])[j] = val_strl; break; } case 118: case 120: { int16_t v = 0; int64_t o = 0, z = 0; z = readbin(z, file, swapit); // works for LSF on little- and big-endian if (byteorder.compare("LSF")==0) { v = (int16_t)z; o = (z >> 16); } // works if we read a big-endian file on little-endian if (byteorder.compare("MSF")==0) { v = (z >> 48) & ((1 << 16) - 1); o = z & ((1 << 16) - 1); } stringstream val_stream; val_stream << v << '_' << o; string val_strl = val_stream.str(); as(df[ii])[j] = val_strl; break; } case 119: case 121: { int32_t v = 0; int64_t o = 0, z = 0; z = readbin(z, file, swapit); // works for LSF on little- and big-endian if (byteorder.compare("LSF")==0) { v = (int32_t)z & ((1 << 24) - 1); o = (z >> 24); } // FixMe: works if we read a big-endian file on little-endian if (byteorder.compare("MSF")==0) { v = (z >> 40) & ((1 << 24) - 1); o = z & ((1 << 24) - 1); } stringstream val_stream; val_stream << v << '_' << o; string val_strl = val_stream.str(); as(df[ii])[j] = val_strl; break; } } break; } case STATA_ALIAS: { break; // do nothing } // case < 0: // case STATA_ALIAS default: { // skip to the next valid case fseeko64(file, abs(type), SEEK_CUR); break; } } if (type >= 0) ii += 1; checkUserInterrupt(); } } return(df); } readstata13/src/save_pre13_dta.cpp0000644000176200001440000002761715002626101016467 0ustar liggesusers/* * Copyright (C) 2014-2025 Jan Marvin Garbuszus and Sebastian Jeworutzki * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation; either version 2 of the License, or (at your * option) any later version. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for * more details. * * You should have received a copy of the GNU General Public License along * with this program. If not, see . */ #include using namespace Rcpp; using namespace std; // Writes the binary Stata file // // @param filePath The full systempath to the dta file you want to export. // @param dat an R-Object of class data.frame. // @export // [[Rcpp::export]] int stata_pre13_save(const char * filePath, Rcpp::DataFrame dat) { uint16_t k = dat.size(); uint32_t n = dat.nrows(); int8_t byteorder = SBYTEORDER; string timestamp = dat.attr("timestamp"); timestamp.resize(18); string datalabel = dat.attr("datalabel"); datalabel[datalabel.size()] = '\0'; CharacterVector valLabels = dat.attr("vallabels"); CharacterVector nvarnames = dat.attr("names"); List chs = dat.attr("expansion.fields"); List formats = dat.attr("formats"); List labeltable = dat.attr("label.table"); List varLabels = dat.attr("var.labels"); List vartypes = dat.attr("types"); int8_t version = as(dat.attr("version")); fstream dta (filePath, ios::out | ios::binary); if (dta.is_open()) { uint32_t ndlabel = 81; uint32_t nformatslen = 49; uint32_t nvarnameslen = 33; uint32_t nvalLabelslen = 33; uint32_t nvarLabelslen = 81; uint32_t chlen = 33; uint32_t maxlabelsize = 32000; uint32_t maxstrsize = 244; if (version<111 || version==112) maxstrsize = 80; switch(version) { case 102: ndlabel = 30; nvarnameslen = 9; nformatslen = 7; nvalLabelslen = 9; nvarLabelslen = 32; break; case 103: case 104: ndlabel = 32; nvarnameslen = 9; nformatslen = 7; nvalLabelslen = 9; nvarLabelslen = 32; break; case 105: case 106:// unknown version (SE?) chlen = 9; ndlabel = 32; nvarnameslen = 9; nformatslen = 12; nvalLabelslen = 9; nvarLabelslen = 32; break; case 107: // unknown version (SE?) case 108: chlen = 9; nvarnameslen = 9; nformatslen = 12; nvalLabelslen = 9; case 110: case 111: case 112: case 113: nformatslen = 12; break; } writebin(version, dta, swapit); // format writebin(byteorder, dta, swapit); // LSF int8_t ft = 1; // filetype writebin(ft, dta, swapit); int8_t unused = 0; // unused writebin(unused, dta, swapit); writebin(k, dta, swapit); // nvars writebin(n, dta, swapit); // nobs /* write a datalabel */ if (datalabel.size() > ndlabel) Rcpp::warning("Datalabel too long. Resizing. Max size is %d.", ndlabel - 1); writestr(datalabel, ndlabel, dta); /* timestamp size is 17 */ if (version > 104) { if (timestamp.size() > 18) { Rcpp::warning("Timestamp too long. Dropping."); timestamp = ""; } writestr(timestamp, timestamp.size(), dta); } /* ... */ uint8_t nvartype; for (uint16_t i = 0; i < k; ++i) { nvartype = as(vartypes[i]); if(version<111 || version==112) { char c[2]; switch(nvartype) { case 255: strcpy(c, "d"); c[1] = '\0'; dta.write(c, 1); break; case 254: strcpy(c, "f"); c[1] = '\0'; dta.write(c, 1); break; case 253: strcpy(c, "l"); c[1] = '\0'; dta.write(c, 1); break; case 252: strcpy(c, "i"); c[1] = '\0'; dta.write(c, 1); break; case 251: strcpy(c,"b"); c[1] = '\0'; dta.write(c, 1); break; default: char d = char(nvartype+127); dta.write(&d, 1); break; } } else writebin(nvartype, dta, swapit); } /* ... */ for (uint16_t i = 0; i < k; ++i ) { string nvarname = as(nvarnames[i]); if (nvarname.size() > nvarnameslen) Rcpp::warning("Varname too long. Resizing. Max size is %d", nvarnameslen - 1); writestr(nvarname, nvarnameslen, dta); } /* ... */ uint32_t big_k = k+1; for (uint32_t i = 0; i < big_k; ++i) { uint16_t nsortlist = 0; writebin(nsortlist, dta, swapit); } /* ... */ for (uint16_t i = 0; i < k; ++i ) { string nformats = as(formats[i]); if (nformats.size() > nformatslen) Rcpp::warning("Formats too long. Resizing. Max size is %d", nformatslen - 1); writestr(nformats, nformatslen, dta); } /* ... */ for (uint16_t i = 0; i < k; ++i ) { string nvalLabels = as(valLabels[i]); if (nvalLabels.size() > nvalLabelslen) Rcpp::warning("Vallabel too long. Resizing. Max size is %d", nvalLabelslen - 1); writestr(nvalLabels, nvalLabelslen, dta); } /* ... */ for (uint16_t i = 0; i < k; ++i) { string nvarLabels = ""; if (!Rf_isNull(varLabels) && Rf_length(varLabels) > 1) { nvarLabels = as(varLabels[i]); if (nvarLabels.size() > nvarLabelslen) Rcpp::warning("Varlabel too long. Resizing. Max size is %d", nvarLabelslen - 1); } writestr(nvarLabels, nvarLabelslen, dta); } /* ... */ if (version > 104) { int8_t datatype = 0; uint32_t len = 0; if (chs.size()>0) { for (int32_t i = 0; i(chs[i]); string ch1 = as(ch[0]); ch1[ch1.size()] = '\0'; string ch2 = as(ch[1]); ch2[ch2.size()] = '\0'; string ch3 = as(ch[2]); ch3[ch3.size()] = '\0'; len = chlen + chlen + ch3.size()+1; datatype = 1; writebin(datatype, dta, swapit); if(version<=108) writebin((int16_t)len, dta, swapit); else writebin(len, dta, swapit); writestr(ch1, chlen, dta); writestr(ch2, chlen, dta); writestr(ch3, ch3.size()+1, dta); } } // five bytes of zero end characteristics datatype = 0; len = 0; writebin(datatype, dta, swapit); if (version<=108) writebin((int16_t)len, dta, swapit); else writebin(len, dta, swapit); } /* ... */ for(uint32_t j = 0; j < n; ++j) { for (uint16_t i = 0; i < k; ++i) { int const type = vartypes[i]; switch(type) { // store numeric as Stata double (double) case 255: { double val_d = 0; val_d = as(dat[i])[j]; if ( (val_d == NA_REAL) | R_IsNA(val_d) ) val_d = STATA_DOUBLE_NA; writebin(val_d, dta, swapit); break; } // float case 254: { double val_d = 0; float val_f = 0; val_d = as(dat[i])[j]; if ((val_d == NA_REAL) | (R_IsNA(val_d)) ) val_f = STATA_FLOAT_NA; else val_f = (float)(val_d); writebin(val_f, dta, swapit); break; } // store integer as Stata long (int32_t) case 253: { int32_t val_l = 0; val_l = as(dat[i])[j]; if ( (val_l == NA_INTEGER) | (R_IsNA(val_l)) ) { if(version>111) val_l = STATA_INT_NA; else val_l = STATA_INT_NA_108; } writebin(val_l, dta, swapit); break; } // int case 252: { int16_t val_i = 0; int32_t val_l = 0; val_l = as(dat[i])[j]; if (val_l == NA_INTEGER) val_i = STATA_SHORTINT_NA; else val_i = val_l; writebin(val_i, dta, swapit); break; } // byte case 251: { int8_t val_b = 0; int32_t val_l = 0; val_l = as(dat[i])[j]; if (val_l == NA_INTEGER) { if (version>104) val_b = STATA_BYTE_NA; else val_b = STATA_BYTE_NA_104; } else { val_b = val_l; } writebin(val_b, dta, swapit); break; } default: { int32_t len = vartypes[i]; CharacterVector cv_s = NA_STRING; cv_s = as(dat[i])[j]; std::string val_s = ""; if (cv_s[0] != NA_STRING) val_s = as(cv_s); // Stata 6-12 can only store 244 byte strings if(val_s.size()>maxstrsize) { Rcpp::warning("Character value too long. Resizing. Max size is %d.", maxstrsize); } writestr(val_s, len, dta); break; } } } } /* ... */ if ((labeltable.size()>0) & (version>105)) { CharacterVector labnames = labeltable.attr("names"); int8_t padding = 0; for (int32_t i=0; i < labnames.size(); ++i) { int32_t txtlen = 0; string labname = as(labnames[i]); IntegerVector labvalue = labeltable[labname]; int32_t N = labvalue.size(); CharacterVector labelText = labvalue.attr("names"); IntegerVector off; /* * Fill off with offset position and create txtlen */ for (int32_t i = 0; i < labelText.size(); ++i) { string label = as(labelText[i]); uint32_t labellen = label.size()+1; if (labellen > maxlabelsize+1) labellen = maxlabelsize+1; txtlen += labellen; off.push_back ( txtlen-labellen ); } int32_t offI, labvalueI; int32_t nlen = sizeof(N) + sizeof(txtlen) + sizeof(offI)*N + sizeof(labvalueI)*N + txtlen; writebin(nlen, dta, swapit); writestr(labname, nvarnameslen, dta); writestr((char*)&padding, 3, dta); writebin(N, dta, swapit); writebin(txtlen, dta, swapit); for (int32_t i = 0; i < N; ++i) { offI = off[i]; writebin(offI, dta, swapit); } for (int32_t i = 0; i < N; ++i) { labvalueI = labvalue[i]; writebin(labvalueI, dta, swapit); } for (int32_t i = 0; i < N; ++i) { string labtext = as(labelText[i]); if (labtext.size() > maxlabelsize) { Rcpp::warning("Label too long. Resizing. Max size is %d", maxlabelsize); labtext.resize(maxlabelsize); // labtext[labtext.size()] = '\0'; } writestr(labtext, labtext.size()+1, dta); } } } dta.close(); return 0; } else { Rcpp::stop("Unable to open file."); return -1; } } readstata13/src/save_dta.cpp0000644000176200001440000004526115002626101015450 0ustar liggesusers/* * Copyright (C) 2014-2025 Jan Marvin Garbuszus and Sebastian Jeworutzki * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation; either version 2 of the License, or (at your * option) any later version. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for * more details. * * You should have received a copy of the GNU General Public License along * with this program. If not, see . */ #include using namespace Rcpp; using namespace std; // // create big endian file from little endian // #ifdef swapit // #undef swapit // #undef sbyteorder // #undef SBYTEORDER // #define swapit TRUE // #define sbyteorder "MSF" // #define SBYTEORDER 1 // #endif // Writes the binary Stata file // // @param filePath The full systempath to the dta file you want to export. // @param dat an R-Object of class data.frame. // @export // [[Rcpp::export]] int stata_save(const char * filePath, Rcpp::DataFrame dat) { uint32_t k = dat.size(); uint64_t n = dat.nrows(); const string timestamp = dat.attr("timestamp"); string datalabel = dat.attr("datalabel"); datalabel[datalabel.size()] = '\0'; CharacterVector valLabels = dat.attr("vallabels"); CharacterVector nvarnames = dat.attr("names"); List chs = dat.attr("expansion.fields"); List formats = dat.attr("formats"); List labeltable = dat.attr("label.table"); List varLabels = dat.attr("var.labels"); List vartypes = dat.attr("types"); const string version = dat.attr("version"); uint8_t const release = atoi(version.c_str()); uint8_t nformatslen = 0, ntimestamp = 0; uint16_t nvarnameslen = 0, nvarLabelslen = 0, nvalLabelslen = 0, ndlabel = 0, lbllen = 0; uint32_t chlen = 0, maxdatalabelsize = 0, maxlabelsize = 32000; switch (release) { case 117: nvarnameslen = 33; nformatslen = 49; nvalLabelslen = 33; nvarLabelslen = 81; maxdatalabelsize = 80; chlen = 33; lbllen = 33; break; case 118: case 119: case 120: case 121: nvarnameslen = 129; nformatslen = 57; nvalLabelslen = 129; nvarLabelslen = 321; maxdatalabelsize = 320; // in utf8 4 * 80 byte chlen = 129; lbllen = 129; break; } const string head = "
"; const string byteord = ""; const string K = ""; const string num = ""; const string lab = ""; const string endheader = "
"; const string startmap = ""; const string endmap = ""; const string startvart = ""; const string endvart = ""; const string startvarn = ""; const string endvarn = ""; const string startsor = ""; const string endsor = ""; const string startform = ""; const string endform = ""; const string startvalLabel = ""; const string endvalLabel = ""; const string startvarlabel= ""; const string endvarlabel= ""; const string startcharacteristics = ""; const string endcharacteristics = ""; const string startch = ""; const string endch = ""; const string startdata = ""; const string enddata = ""; const string startstrl = ""; const string endstrl = ""; const string startvall = ""; const string endvall = ""; const string startlbl = ""; const string endlbl = ""; string end = "
"; end[end.size()] = '\0'; fstream dta (filePath, ios::out | ios::binary); if (dta.is_open()) { /* Stata 13 uses to store 14 byte positions in a dta-file. This * vector is now created and filled with the correct map positions. At * the end of the creation process, all 14 values are known and map will * be filled with the correct values. */ NumericVector map(14); map(0) = dta.tellg(); writestr(head, head.size(), dta); writestr(version, 3, dta); // 117|118 (e.g. Stata 13|14) writestr(byteord, byteord.size(), dta); writestr(sbyteorder, 3, dta); // LSF writestr(K, K.size(), dta); if (release < 119 || release == 120) writebin((int16_t)k, dta, swapit); if (release == 119 || release == 121) writebin(k, dta, swapit); writestr(num, num.size(), dta); if (release == 117) writebin((int32_t)n, dta, swapit); if ((release == 118) | (release == 119) | (release == 120) | (release == 121)) writebin(n, dta, swapit); writestr(lab, lab.size(), dta); /* write a datalabel */ if (!datalabel.empty()) { if (datalabel.size() > maxdatalabelsize) { Rcpp::warning("Datalabel to long. Resizing. Max size is %d.", maxdatalabelsize); datalabel.resize(maxdatalabelsize); datalabel[datalabel.size()] = '\0'; } ndlabel = datalabel.size(); if (release == 117) writebin((uint8_t)ndlabel, dta, swapit); if ((release == 118) | (release == 119) | (release == 120) | (release == 121)) writebin(ndlabel, dta, swapit); writestr(datalabel,datalabel.size(), dta); } else { // empty data label defined by byte(s) of zero uint8_t zero = 0; if (release == 117) { writebin(zero, dta, swapit); } if ((release == 118) | (release == 119) | (release == 120) | (release == 121)) { writebin(zero, dta, swapit); writebin(zero, dta, swapit); } } /* timestamp size is 0 (= no timestamp) or 17 */ writestr(timest, timest.size(), dta); if (!timestamp.empty()) { ntimestamp = 17; writebin(ntimestamp, dta, swapit); writestr(timestamp, timestamp.size(), dta); }else{ writebin(ntimestamp, dta, swapit); } writestr(endheader, endheader.size(), dta); /* ... */ map(1) = dta.tellg(); writestr(startmap, startmap.size(), dta); for (int32_t i = 0; i <14; ++i) { uint64_t nmap = 0; writebin(nmap, dta, swapit); } writestr(endmap, endmap.size(), dta); /* ... */ map(2) = dta.tellg(); writestr(startvart, startvart.size(), dta); uint16_t nvartype; for (uint32_t i = 0; i < k; ++i) { nvartype = as(vartypes[i]); writebin(nvartype, dta, swapit); } writestr(endvart, endvart.size(), dta); /* ... */ map(3) = dta.tellg(); writestr(startvarn, startvarn.size(), dta); for (uint32_t i = 0; i < k; ++i ) { string nvarname = as(nvarnames[i]); nvarname[nvarname.size()] = '\0'; if (nvarname.size() > nvarnameslen) Rcpp::warning("Varname to long. Resizing. Max size is %d", nvarnameslen - 1); writestr(nvarname, nvarnameslen, dta); } writestr(endvarn, endvarn.size(), dta); /* ... */ map(4) = dta.tellg(); writestr(startsor, startsor.size(), dta); uint64_t big_k = k+1; for (uint64_t i = 0; i < big_k; ++i) { uint32_t nsortlist = 0; if ((release == 117) | (release == 118) | (release == 120)) { writebin((uint16_t)nsortlist, dta, swapit); } if ((release == 119) | (release == 121)) { writebin(nsortlist, dta, swapit); } } writestr(endsor, endsor.size(), dta); /* ... */ map(5) = dta.tellg(); writestr(startform, startform.size(), dta); for (uint32_t i = 0; i < k; ++i ) { string nformats = as(formats[i]); if (nformats.size() >= nformatslen) Rcpp::warning("Formats to long. Resizing. Max size is %d", nformatslen); writestr(nformats, nformatslen, dta); } writestr(endform, endform.size(), dta); /* ... */ map(6) = dta.tellg(); writestr(startvalLabel, startvalLabel.size(), dta); for (uint32_t i = 0; i < k; ++i) { string nvalLabels = as(valLabels[i]); nvalLabels[nvalLabels.size()] = '\0'; if (nvalLabels.size() > nvalLabelslen) Rcpp::warning("Vallabel to long. Resizing. Max size is %d", nvalLabelslen - 1); writestr(nvalLabels, nvalLabelslen, dta); } writestr(endvalLabel, endvalLabel.size(), dta); /* ... */ map(7) = dta.tellg(); writestr(startvarlabel, startvarlabel.size(), dta); for (uint32_t i = 0; i < k; ++i) { if (!Rf_isNull(varLabels) && Rf_length(varLabels) > 1) { string nvarLabels = as(varLabels[i]); if (nvarLabels.size() > nvarLabelslen) Rcpp::warning("Varlabel to long. Resizing. Max size is %d", nvarLabelslen - 1); nvarLabels[nvarLabels.size()] = '\0'; writestr(nvarLabels, nvarLabelslen, dta); } else { string nvarLabels = ""; nvarLabels[nvarLabels.size()] = '\0'; writestr(nvarLabels, nvarLabelslen, dta); } } writestr(endvarlabel, endvarlabel.size(), dta); /* ... */ map(8) = dta.tellg(); writestr(startcharacteristics, startcharacteristics.size(), dta); /* ... */ if (chs.size()>0){ for (int32_t i = 0; i(chs[i]); string ch1 = as(ch[0]); ch1[ch1.size()] = '\0'; string ch2 = as(ch[1]); ch2[ch2.size()] = '\0'; string ch3 = as(ch[2]); ch3[ch3.size()] = '\0'; uint32_t nnocharacter = chlen*2 + ch3.size() +1; writebin(nnocharacter, dta, swapit); writestr(ch1, chlen, dta); writestr(ch2, chlen, dta); writestr(ch3,ch3.size()+1, dta); writestr(endch, endch.size(), dta); } } writestr(endcharacteristics, endcharacteristics.size(), dta); /* ... */ map(9) = dta.tellg(); writestr(startdata, startdata.size(), dta); IntegerVector V, O; CharacterVector STRL; for(uint64_t j = 0; j < n; ++j) { for (uint32_t i = 0; i < k; ++i) { int const type = vartypes[i]; switch(type < 2046 ? 2045 : type) { // store numeric as Stata double (double) case 65526: { double val_d = 0; val_d = as(dat[i])[j]; if ( (val_d == NA_REAL) | R_IsNA(val_d) | R_IsNaN(val_d) | std::isinf(val_d) ) val_d = STATA_DOUBLE_NA; writebin(val_d, dta, swapit); break; } // float case 65527: { double val_d = 0; float val_f = 0; val_d = as(dat[i])[j]; if ( (val_d == NA_REAL) | (R_IsNA(val_d)) | R_IsNaN(val_d) | std::isinf(val_d) ) val_f = STATA_FLOAT_NA; else val_f = (double)(val_d); writebin(val_f, dta, swapit); break; } // store integer as Stata long (int32_t) case 65528: { int32_t val_l = 0; val_l = as(dat[i])[j]; if ( (val_l == NA_INTEGER) | (R_IsNA(val_l)) | R_IsNaN(val_l) | std::isinf(val_l) ) val_l = STATA_INT_NA; writebin(val_l, dta, swapit); break; } // int case 65529: { int16_t val_i = 0; int32_t val_l = 0; val_l = as(dat[i])[j]; if (val_l == NA_INTEGER) val_i = STATA_SHORTINT_NA; else val_i = val_l; writebin(val_i, dta, swapit); break; } // byte case 65530: { int8_t val_b = 0; int32_t val_l = 0; val_l = as(dat[i])[j]; if (val_l == NA_INTEGER) val_b = STATA_BYTE_NA; else val_b = val_l; writebin(val_b, dta, swapit); break; } // str case 2045: { int32_t const len = vartypes[i]; CharacterVector cv_s = NA_STRING; cv_s = as(dat[i])[j]; std::string val_s = ""; if (cv_s[0] != NA_STRING) val_s = as(cv_s); writestr(val_s, len, dta); break; } // strL case 32768: { /* Stata uses +1 */ int64_t z = 0; CharacterVector cv_s = NA_STRING; cv_s = as(dat[i])[j]; std::string val_strl = ""; if (cv_s[0] != NA_STRING) val_strl = as(cv_s); if (!val_strl.empty()) { switch (release) { case 117: { uint32_t v = i+1, o = j+1; writebin(v, dta, swapit); writebin(o, dta, swapit); // push back every v, o and val_strl V.push_back(v); O.push_back(o); break; } case 118: case 120: { int16_t v = i+1; int64_t o = j+1; char z[8]; // push back every v, o and val_strl V.push_back(v); if (swapit) v = swap_endian(v); O.push_back(o); // z is 'vv-- ----' memcpy(&z[0], &v, sizeof(v)); if (SBYTEORDER == 1) { o <<= 16; if (swapit) o = swap_endian(o); } memcpy(&z[2], &o, 6); // z is 'vvoo oooo' dta.write((char*)&z, sizeof(z)); // writestr((char*)&z, sizeof(z), dta); break; } case 119: case 121: { int32_t v = i+1; int64_t o = j+1; char z[8]; // push back every v, o and val_strl V.push_back(v); O.push_back(o); // z is 'vvv- ----' if (SBYTEORDER == 1) { v <<= 8; if (swapit) v = swap_endian(v); } memcpy(&z[0], &v, 3); if (SBYTEORDER == 1) { o <<= 24; if (swapit) o = swap_endian(o); } memcpy(&z[3], &o, 5); // z is 'vvvo oooo' dta.write((char*)&z, sizeof(z)); // writestr((char*)&z, sizeof(z), dta); break; } } STRL.push_back(val_strl); } else { writestr((char*)&z, sizeof(z), dta); } break; } } } } writestr(enddata, enddata.size(), dta); /* ... */ map(10) = dta.tellg(); writestr(startstrl, startstrl.size(), dta); int32_t strlsize = STRL.length(); for(int i =0; i < strlsize; ++i ) { const string gso = "GSO"; int32_t v = V[i]; int64_t o = O[i]; uint8_t t = 129; //Stata binary type, no trailing zero. const string strL = as(STRL[i]); uint32_t len = strL.size(); writestr(gso, gso.size(), dta); writebin(v, dta, swapit); if (release == 117) writebin((uint32_t)o, dta, swapit); if ((release == 118) | (release == 119) | (release == 120) | (release == 121)) writebin(o, dta, swapit); writebin(t, dta, swapit); writebin(len, dta, swapit); writestr(strL, strL.size(), dta); } writestr(endstrl, endstrl.size(), dta); /* ... */ map(11) = dta.tellg(); writestr(startvall, startvall.size(), dta); if (labeltable.size()>0) { CharacterVector labnames = labeltable.attr("names"); int8_t padding = 0; for (int32_t i=0; i < labnames.size(); ++i) { int32_t txtlen = 0; const string labname = as(labnames[i]); IntegerVector labvalue = labeltable[labname]; int32_t N = labvalue.size(); CharacterVector labelText = labvalue.attr("names"); IntegerVector off; /* * Fill off with offset position and create txtlen */ for (int32_t i = 0; i < labelText.size(); ++i) { string label = as(labelText[i]); uint32_t labellen = label.size()+1; if (labellen > maxlabelsize+1) labellen = maxlabelsize+1; txtlen += labellen; off.push_back ( txtlen-labellen ); } int32_t offI, labvalueI; int32_t nlen = sizeof(N) + sizeof(txtlen) + sizeof(offI)*N + sizeof(labvalueI)*N + txtlen; writestr(startlbl, startlbl.size(), dta); writebin(nlen, dta, swapit); writestr(labname, lbllen, dta); writestr((char*)&padding, 3, dta); writebin(N, dta, swapit); writebin(txtlen, dta, swapit); for (int32_t i = 0; i < N; ++i) { offI = off[i]; writebin(offI, dta, swapit); } for (int32_t i = 0; i < N; ++i) { labvalueI = labvalue[i]; writebin(labvalueI, dta, swapit); } for (int32_t i = 0; i < N; ++i) { string labtext = as(labelText[i]); if (labtext.size() > maxlabelsize) { Rcpp::warning("Label to long. Resizing. Max size is %d", maxlabelsize); labtext.resize(maxlabelsize); // labtext[labtext.size()] = '\0'; } writestr(labtext, labtext.size()+1, dta); } writestr(endlbl, endlbl.size(), dta); } } writestr(endvall, endvall.size(), dta); /* */ map(12) = dta.tellg(); writestr(end, end.size(), dta); /* end-of-file */ map(13) = dta.tellg(); /* seek up to to rewrite it*/ /* ... */ dta.seekg(map(1)); writestr(startmap, startmap.size(), dta); for (int i=0; i <14; ++i) { uint64_t nmap = 0; uint32_t hi = 0, lo = 0; nmap = map(i); hi = (nmap >> 32); lo = nmap; if (SBYTEORDER == 2) { // LSF writebin(lo, dta, swapit); writebin(hi, dta, swapit); } else { // MSF writebin(hi, dta, swapit); writebin(lo, dta, swapit); } } writestr(endmap, endmap.size(), dta); dta.close(); return 0; } else { throw std::range_error("Unable to open file."); return -1; } } readstata13/src/Makevars0000644000176200001440000000004515002626101014641 0ustar liggesusersPKG_CPPFLAGS = -I../inst/include -I. readstata13/src/read_pre13_dta.cpp0000644000176200001440000003354215002626101016436 0ustar liggesusers/* * Copyright (C) 2014-2025 Jan Marvin Garbuszus and Sebastian Jeworutzki * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation; either version 2 of the License, or (at your * option) any later version. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for * more details. * * You should have received a copy of the GNU General Public License along * with this program. If not, see . */ #include "readstata.h" #include "read_data.h" using namespace Rcpp; using namespace std; List read_pre13_dta(FILE * file, const bool missing, const IntegerVector selectrows, const CharacterVector selectcols_chr, const IntegerVector selectcols_int) { int8_t release = 0; rewind(file); release = readbin(release, file, 0); if (release<102 || release == 109 || release>115) stop("First byte: Not a dta-file we can read."); IntegerVector versionIV(1); versionIV(0) = release; /* * byteorder is a 4 byte character e.g. "LSF". MSF referes to big-endian. */ uint16_t ndlabel = 81; uint8_t nvarnameslen = 33; int8_t nformatslen = 49; uint8_t nvalLabelslen = 33; uint16_t nvarLabelslen = 81; int32_t chlen = 33; uint8_t lbllen = 33; switch(release) { case 102: ndlabel = 30; nvarnameslen = 9; nformatslen = 7; nvalLabelslen = 9; nvarLabelslen = 32; break; case 103: case 104: ndlabel = 32; nvarnameslen = 9; nformatslen = 7; nvalLabelslen = 9; nvarLabelslen = 32; break; case 105: case 106: chlen = 9; ndlabel = 32; nvarnameslen = 9; nformatslen = 12; nvalLabelslen = 9; nvarLabelslen = 32; lbllen = 9; break; case 107: case 108: chlen = 9; nvarnameslen = 9; nformatslen = 12; nvalLabelslen = 9; lbllen = 9; break; case 110: case 111: case 112: case 113: nformatslen = 12; break; } CharacterVector byteorderC(1); IntegerVector byteorderI(1); bool swapit = 0; int8_t byteorder_i = 0; byteorder_i = readbin(byteorder_i, file, 0); // 1 = MSF 2 = LSF swapit = std::abs(SBYTEORDER-byteorder_i); byteorderI(0) = byteorder_i; std::string byteorder(3, '\0'); if (byteorder_i == 1) byteorder = "MSF"; else byteorder = "LSF"; // filetype: unknown? int8_t ft = 0; ft = readbin(ft, file, swapit); int8_t unused = 0; unused = readbin(unused, file, swapit); /* * Number of Variables */ uint16_t k = 0; k = readbin(k, file, swapit); /* * Number of Observations */ uint32_t n = 0; n = readbin(n, file, swapit); // dim to return original dim for partial read files IntegerVector dim(2); dim(0) = n; dim(1) = k; /* * A dataset may have a label e.g. "Written by R". * First we read its length (ndlabel), later the actual label (datalabel). * ndlabel: length of datalabel (excl. binary 0) * datalabel: string max length 80 */ CharacterVector datalabelCV(1); std::string datalabel(ndlabel, '\0'); if (ndlabel > 0) readstring(datalabel, file, datalabel.size()); else datalabel = ""; datalabelCV(0) = datalabel; CharacterVector timestampCV(1); std::string timestamp(18, '\0'); switch (release) { case 102: case 103: case 104: { timestamp = ""; break; } default: { readstring(timestamp, file, timestamp.size()); break; } } timestampCV(0) = timestamp; /* * vartypes. * 0-2045: strf (String: Max length 2045) * 32768: strL (long String: Max length 2 billion) * 65526: double * 65527: float * 65528: long * 65529: int * 65530: byte */ IntegerVector vartype(k); switch (release) { case 102: case 103: case 104: case 105: case 106: case 107: case 108: case 110: case 112: { uint8_t nvartypec = 0; for (uint16_t i=0; i127) vartype[i] = nvartypec - 127; } break; } case 111: case 113: case 114: case 115: { uint8_t nvartype = 0; for (uint16_t i=0; i ... */ List ch = List(); if (release > 104) { int8_t datatype = 0; uint32_t len = 0; datatype = readbin(datatype, file, swapit); if (release <= 108) len = readbin((uint16_t)len, file, swapit); else len = readbin(len, file, swapit); while (!(datatype==0) && !(len==0)) { std::string chvarname(chlen, '\0'); std::string chcharact(chlen, '\0'); std::string nnocharacter(len-chlen*2, '\0'); readstring(chvarname, file, chvarname.size()); readstring(chcharact, file, chcharact.size()); readstring(nnocharacter, file, nnocharacter.size()); // chs vector CharacterVector chs(3); chs[0] = chvarname; chs[1] = chcharact; chs[2] = nnocharacter; // add characteristics to the list ch.push_front( chs ); datatype = readbin(datatype, file, swapit); if (release <= 108) len = readbin((uint16_t)len, file, swapit); else len = readbin(len, file, swapit); } } /* * data. First a list is created with vectors. The vector type is defined by * vartype. Stata stores data columnwise so we loop over it and store the * data in the list of the first step. Third variable- and row-names are * attached and the list type is changed to data.frame. */ /* replace vartypes of Stata 8 - 12 with Stata 13 values. */ // 117 contains new variable types (longer strings and strL) std::replace (vartype.begin(), vartype.end(), 251, STATA_BYTE); std::replace (vartype.begin(), vartype.end(), 252, STATA_SHORTINT); std::replace (vartype.begin(), vartype.end(), 253, STATA_INT); std::replace (vartype.begin(), vartype.end(), 254, STATA_FLOAT); std::replace (vartype.begin(), vartype.end(), 255, STATA_DOUBLE); uint64_t nmin = selectrows(0), nmax = selectrows(1); uint64_t nn = 0; // if selectrows is c(0,0) use full data if ((nmin == 0) && (nmax == 0)){ nmin = 1; nmax = n; } // make sure that n is not greater than nmax or nmin if (n < nmax) nmax = n; if (n < nmin) nmin = n; // sequences of column and row IntegerVector cvec = seq(0, (k-1)); IntegerVector rvec = seq(nmin, nmax); nn = rvec.size(); // use c indexing starting at 0 nmin = nmin -1; nmax = nmax -1; // calculate length of each variable stored in file. Calculate row length IntegerVector rlen = calc_rowlength(vartype); uint64_t rlength = sum(rlen); // check if vars are selected IntegerVector select = cvec, nselect; // select vars: either select every var or only matched cases. This will // return index positions of the selected variables. If non are selected the // index position is cvec // // name selection was passed to selectcols bool all_na_chr = all(is_na(selectcols_chr)); if (!all_na_chr) { select = choose(selectcols_chr, varnames); } // numeric selection was passed to selectcols bool all_na_int = all(is_na(selectcols_int)); if (!all_na_int) { IntegerVector seq_varnames = seq_along(varnames); select = choose(selectcols_int, seq_varnames); } // separate the selected from the not selected cases LogicalVector ll = is_na(select); nselect = cvec[ll == 1]; select = cvec[ll == 0]; uint32_t kk = select.size(); // shrink variables to selected size CharacterVector varnames_kk = varnames[select]; IntegerVector vartype_kk = vartype[select]; IntegerVector vartype_s = vartype; IntegerVector types_kk = types[select]; // replace not selected cases with their negative size values IntegerVector rlen2 = rlen[nselect]; rlen2 = -rlen2; vartype_s[nselect] = rlen2; // Use vartype_s to calulate jump IntegerVector vartype_sj = calc_jump(vartype_s); // 2. fill it with data // skip into the data part fseeko64(file, rlength * nmin, SEEK_CUR); List df = read_data(file, vartype_kk, missing, release, nn, kk, vartype_sj, byteorder, swapit); // skip to end of data part fseeko64(file, rlength * (n - nmax -1), SEEK_CUR); // 3. Create a data.frame df.attr("row.names") = rvec; df.attr("names") = varnames_kk; df.attr("class") = "data.frame"; /* * labels are separated by -tags. Labels may appear in any order e.g. * 2 "female" 1 "male 9 "missing". They are stored as tables. * nlen: length of label. * nlabname: label name. * labn: number of labels in this set (e.g. "male" "female" = 2) * txtlen: length of the label text. * off: offset defines where to read a new label in txtlen. */ List labelList = List(); //put labels into this list if (release>105) { // FixMe: the while statement differs and the final check int32_t nlen = 0, labn = 0, txtlen = 0, noff = 0, val = 0; std::string tag(5, '\0'); bool haslabel = false; // length of value_label_table nlen = readbin(nlen, file, swapit); if (!(feof(file) || ferror(file))) haslabel = true; while(haslabel) { // name of this label set std::string nlabname(lbllen, '\0'); readstring(nlabname, file, nlabname.size()); //padding fseek(file, 3, SEEK_CUR); // value_label_table for actual label set labn = readbin(labn, file, swapit); txtlen = readbin(txtlen, file, swapit); // offset for each label // off0 : label 0 starts at off0 // off1 : label 1 starts at off1 ... IntegerVector off(labn); for (int i=0; i < labn; ++i) { noff = readbin(noff, file, swapit); off[i] = noff; } // needed for match IntegerVector laborder = clone(off); //laborder.erase(labn+1); IntegerVector labordersort = clone(off); //labordersort.erase(labn+1); std::sort(labordersort.begin(), labordersort.end()); // needs txtlen for loop off.push_back(txtlen); // sort offsets so we can read labels sequentially std::sort(off.begin(), off.end()); // create an index to sort labels along the code values // this is done while factor creation IntegerVector indx(labn); indx = match(laborder,labordersort); // code for each label IntegerVector code(labn); for (int i=0; i < labn; ++i) { val = readbin(val, file, swapit); code[i] = val; } // label text CharacterVector label(labn); for (int i=0; i < labn; ++i) { int lablen = off[i+1]-off[i]; std::string lab (lablen, '\0'); readstring(lab, file, lablen); label[i] = lab; } // sort labels according to indx CharacterVector labelo(labn); for (int i=0; i < labn; ++i) { labelo[i] = label[indx[i]-1]; } // create table for actual label set string const labset = nlabname; code.attr("names") = labelo; // add this set to output list labelList.push_front( code, labset); // length of value_label_table nlen = readbin(nlen, file, swapit); if (feof(file) || ferror(file)) break; } } /* * assign attributes to the resulting data.frame */ formats = formats[select]; valLabels = valLabels[select]; varLabels = varLabels[select]; df.attr("datalabel") = datalabelCV; df.attr("time.stamp") = timestampCV; df.attr("formats") = formats; df.attr("types") = types_kk; df.attr("val.labels") = valLabels; df.attr("var.labels") = varLabels; df.attr("version") = versionIV; df.attr("label.table") = labelList; df.attr("expansion.fields") = ch; df.attr("byteorder") = byteorderI; df.attr("orig.dim") = dim; return df; } readstata13/src/read_dta.cpp0000644000176200001440000004245515002626101015427 0ustar liggesusers/* * Copyright (C) 2014-2025 Jan Marvin Garbuszus and Sebastian Jeworutzki * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation; either version 2 of the License, or (at your * option) any later version. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for * more details. * * You should have received a copy of the GNU General Public License along * with this program. If not, see . */ #include "readstata.h" #include "read_data.h" using namespace Rcpp; using namespace std; List read_dta(FILE * file, const bool missing, const IntegerVector selectrows, const CharacterVector selectcols_chr, const IntegerVector selectcols_int, const bool strlexport, const CharacterVector strlpath) { // stata_dta>
test("stata_dta>
", file); test("", file); /* * version is a 4 byte character e.g. "117" */ int8_t fversion = 117L; //f = first int8_t lversion = 121L; //l = last std::string version(3, '\0'); readstring(version, file, version.size()); int8_t const release = atoi(version.c_str()); IntegerVector versionIV(1); versionIV(0) = release; // check the release version. if (releaselversion) { warning("File version is %d.\nVersion: Not a version 13/14 dta-file", release); return -1; } uint8_t nvarnameslen = 0; int8_t nformatslen = 0; uint8_t nvalLabelslen = 0; uint16_t nvarLabelslen = 0; int32_t chlen = 0; uint8_t lbllen = 0; switch(release) { case 117: nvarnameslen = 33; nformatslen = 49; nvalLabelslen = 33; nvarLabelslen = 81; chlen = 33; lbllen = 33; break; case 118: case 119: case 120: case 121: nvarnameslen = 129; nformatslen = 57; nvalLabelslen = 129; nvarLabelslen = 321; chlen = 129; lbllen = 129; break; } // test("", file); test("", file); /* * byteorder is a 4 byte character e.g. "LSF". MSF refers to big-endian. */ std::string byteorder(3, '\0'); readstring(byteorder,file, byteorder.size()); // test("", file); test("", file); bool swapit = 0; swapit = strcmp(byteorder.c_str(), sbyteorder); /* * Number of Variables */ uint32_t k = 0; if (release < 119 || release == 120) k = readbin((uint16_t)k, file, swapit); if (release == 119 || release == 121) k = readbin(k, file, swapit); // test("", file); test("", file); /* * Number of Observations */ uint64_t n = 0; if (release == 117) n = readbin((uint32_t)n, file, swapit); if ((release >= 118) && (release <= 121)) n = readbin(n, file, swapit); // test("", file); test(" test("", file); test("", file); /* * A dataset may have a timestamp. If it has a timestamp the length of the * timestamp (ntimestamp) is 17. Else it is zero. * ntimestamp: 0 or 17 * timestamp: empty or 17 byte string */ uint8_t ntimestamp = 0; ntimestamp = readbin(ntimestamp, file, swapit); std::string timestamp(17, '\0'); if (ntimestamp == 17) // ntimestap is 0 or 17 { readstring(timestamp, file, timestamp.size()); } else { timestamp = ""; } CharacterVector timestampCV = timestamp; //
test("
", file); test("", file); /* * Stata stores the byteposition of certain areas of the file here. Currently * this is of no use to us. * 1. * 2. * 3. * 4. * 5. * 6. * 7. * 8. * 9. * 10. * 11. * 12. * 13. * 14. end-of-file */ NumericVector map(14); for (int i=0; i <14; ++i) { uint64_t nmap = 0; nmap = readbin(nmap, file, swapit); map[i] = nmap; } // test("
", file); test("", file); /* * vartypes. * 0-2045: strf (String: Max length 2045) * 32768: strL (long String: Max length 2 billion) * 65525: alias * 65526: double * 65527: float * 65528: long * 65529: int * 65530: byte */ IntegerVector vartype(k); for (uint32_t i=0; i test("", file); test("", file); /* * varnames. */ std::string nvarnames(nvarnameslen, '\0'); CharacterVector varnames(k); for (uint32_t i=0; i test("", file); test("", file); /* * sortlist. Stata stores the information which variable of a dataset was * sorted. Depending on byteorder sortlist is written differently. Currently we * do not use this information. * Vector size is k+1. */ uint64_t big_k = k+1; IntegerVector sortlist(big_k); for (uint64_t i=0; i test("", file); test("", file); /* * formats handle how Stata prints a variable. Currently we do not use this * information. */ std::string nformats(nformatslen, '\0'); CharacterVector formats(k); for (uint32_t i=0; i test("", file); test("",file); /* * value_label_names. Stata stores variable labels by names. * nvalLabels: length of the value_label_name * valLabels: */ std::string nvalLabels(nvalLabelslen, '\0'); CharacterVector valLabels(k); for (uint32_t i=0; i test("", file); test("", file); /* * variabel_labels */ std::string nvarLabels (nvarLabelslen, '\0'); CharacterVector varLabels(k); for (uint32_t i=0; i test("", file); test("", file); /* * characteristics. Stata can store additional information this way. It may * contain notes (for the dataset or a variable) or about label language sets. * Characteristics are not documented. We export them as attribute: * expansion.fields. Characteristics are separated by tags. Each has: * nocharacter: length of the characteristics * chvarname: varname (binary 0 terminated) * chcharact: characteristicsname (binary 0 terminated) * nnocharacter: contes (binary 0 terminated) */ std::string chtag = ""; std::string tago(4, '\0'); readstring(tago, file, tago.size()); List ch = List(); CharacterVector chs(3); while (chtag.compare(tago)==0) { uint32_t nocharacter = 0; nocharacter = readbin(nocharacter, file, swapit); std::string chvarname(chlen, '\0'); std::string chcharact(chlen, '\0'); std::string nnocharacter(nocharacter-chlen*2, '\0'); readstring(chvarname, file, chvarname.size()); readstring(chcharact, file, chcharact.size()); readstring(nnocharacter, file, nnocharacter.size()); // chs vector CharacterVector chs(3); chs[0] = chvarname; chs[1] = chcharact; chs[2] = nnocharacter; // add characteristics to the list ch.push_front( chs ); // test("", file); // read next tag readstring(tago, file, tago.size()); } //[ test("aracteristics>", file); test("", file); /* * data. First a list is created with vectors. The vector type is defined by * vartype. Stata stores data columnwise so we loop over it and store the * data in the list of the first step. Third variable- and row-names are * attached and the list type is changed to data.frame. */ uint64_t nmin = selectrows(0), nmax = selectrows(1); uint64_t nn = 0; // if selectrows is c(0,0) use full data if ((nmin == 0) && (nmax == 0)){ nmin = 1; nmax = n; } // make sure that n is not greater than nmax or nmin if (n < nmax) nmax = n; if (n < nmin) nmin = n; // sequences of column and row IntegerVector cvec = seq(0, (k-1)); IntegerVector rvec = seq(nmin, nmax); nn = rvec.size(); // use c indexing starting at 0 nmin = nmin -1; nmax = nmax -1; // calculate length of each variable stored in file. Calculate row length IntegerVector rlen = calc_rowlength(vartype); uint64_t rlength = sum(rlen); // check if vars are selected IntegerVector select = cvec, nselect; // select vars: either select every var or only matched cases. This will // return index positions of the selected variables. If non are selected the // index position is cvec // // name selection was passed to selectcols bool all_na_chr = all(is_na(selectcols_chr)); if (!all_na_chr) { select = choose(selectcols_chr, varnames); } // numeric selection was passed to selectcols bool all_na_int = all(is_na(selectcols_int)); if (!all_na_int) { IntegerVector seq_varnames = seq_along(varnames); select = choose(selectcols_int, seq_varnames); } // separate the selected from the not selected cases LogicalVector ll = is_na(select); nselect = cvec[ll == 1]; select = cvec[ll == 0]; uint32_t kk = select.size(); // shrink variables to selected size CharacterVector varnames_kk = varnames[select]; IntegerVector vartype_kk = vartype[select]; IntegerVector vartype_s = vartype; // replace not selected cases with their negative size values IntegerVector rlen2 = rlen[nselect]; rlen2 = -rlen2; vartype_s[nselect] = rlen2; // Use vartype_s to calculate jump IntegerVector vartype_sj = calc_jump(vartype_s); // 2. fill it with data // skip into the data part fseeko64(file, rlength * nmin, SEEK_CUR); List df = read_data(file, vartype_kk, missing, release, nn, kk, vartype_sj, byteorder, swapit); // skip to end of data part fseeko64(file, rlength * (n - nmax -1), SEEK_CUR); // 3. Create a data.frame df.attr("row.names") = rvec; df.attr("names") = varnames_kk; df.attr("class") = "data.frame"; // test("", file); test("", file); /* * strL. Stata 13 introduced long strings up to 2 billion characters. strLs are * separated by "GSO". * (v,o): Position in the data.frame. * t: 129/130 defines whether or not the strL is stored with a binary 0. * len: length of the strL. * strl: long string. */ std::string gso = "GSO"; std::string tags(3, '\0'); readstring(tags, file, tags.size()); //put strLs into a named vector std::vector vec_strlvalues(0); std::vector vec_strlnames(0); while (gso.compare(tags)==0) { string ref; // FixMe: Strl in 118 switch (release) { case 117: { uint32_t v = 0, o = 0; v = readbin(v, file, swapit); o = readbin(o, file, swapit); stringstream val_stream; val_stream << v << '_' << o; ref.assign(val_stream.str()); break; } case 118: case 119: case 120: case 121: { uint32_t v = 0; uint64_t o = 0; v = readbin(v, file, swapit); o = readbin(o, file, swapit); stringstream val_stream; val_stream << v << '_' << o; ref.assign(val_stream.str()); break; } } // (129 = binary) | (130 = ascii) Note: // if 130 full len contains the string. if 130 len includes trailing \0. // that does not affect us. we read the full len, and if \0 occurs R // will print only the string up to that position. we write 129 uint8_t t = 0; t = readbin(t, file, swapit); uint32_t len = 0; len = readbin(len, file, swapit); std::string strl(len, '\0'); readstring(strl, file, strl.size()); // write strl to file. Stata allows binary files in strls if (strlexport) { std::string path = Rcpp::as(strlpath); std::string outputpath = path + "/" + ref; ofstream file1(outputpath.c_str(), ios::out | ios::binary); if (file1.good()) { file1.write(strl.c_str(), strl.size()); file1.close(); } else { Rcpp::Rcout << "strl export failed" << std::endl; } } vec_strlvalues.push_back( strl ); vec_strlnames.push_back( ref ); readstring(tags, file, tags.size()); } // set identifier as name CharacterVector strlvalues = wrap(vec_strlvalues); strlvalues.attr("names") = vec_strlnames; // after strls //[ test("trls>", file); test("", file); /* * labels are separated by -tags. Labels may appear in any order e.g. * 2 "female" 1 "male 9 "missing". They are stored as tables. * nlen: length of label. * nlabname: label name. * labn: number of labels in this set (e.g. "male" "female" = 2) * txtlen: length of the label text. * off: offset defines where to read a new label in txtlen. */ std::string lbltag = ""; std::string tag(5, '\0'); readstring(tag, file, tag.size()); List labelList = List(); //put labels into this list while (lbltag.compare(tag)==0) { int32_t nlen = 0, labn = 0, txtlen = 0, noff = 0, val = 0; // length of value_label_table nlen = readbin(nlen, file, swapit); // name of this label set std::string nlabname(lbllen, '\0'); readstring(nlabname, file, nlabname.size()); //padding fseek(file, 3, SEEK_CUR); // value_label_table for actual label set labn = readbin(labn, file, swapit); txtlen = readbin(txtlen, file, swapit); // offset for each label // off0 : label 0 starts at off0 // off1 : label 1 starts at off1 ... IntegerVector off(labn); for (int i=0; i < labn; ++i) { noff = readbin(noff, file, swapit); off[i] = noff; } // needed for match IntegerVector laborder = clone(off); //laborder.erase(labn+1); IntegerVector labordersort = clone(off); //labordersort.erase(labn+1); std::sort(labordersort.begin(), labordersort.end()); // needs txtlen for loop off.push_back(txtlen); // sort offsets so we can read labels sequentially std::sort(off.begin(), off.end()); // create an index to sort lables along the code values // this is done while factor creation IntegerVector indx(labn); indx = match(laborder,labordersort); // code for each label IntegerVector code(labn); for (int i=0; i < labn; ++i) { val = readbin(val, file, swapit); code[i] = val; } // label text CharacterVector label(labn); for (int i=0; i < labn; ++i) { int lablen = off[i+1]-off[i]; std::string lab (lablen, '\0'); readstring(lab, file, lablen); label[i] = lab; } // sort labels according to indx CharacterVector labelo(labn); for (int i=0; i < labn; ++i) { labelo[i] = label[indx[i]-1]; } // create table for actual label set string const labset = nlabname; code.attr("names") = labelo; // add this set to output list labelList.push_front( code, labset); fseek(file, 6, SEEK_CUR); // readstring(tag, file, tag.size()); } /* * Final test if we reached the end of the file * close the file */ // [ test("ue_labels>", file); test("", file); /* * assign attributes to the resulting data.frame */ formats = formats[select]; valLabels = valLabels[select]; varLabels = varLabels[select]; df.attr("datalabel") = datalabelCV; df.attr("time.stamp") = timestampCV; df.attr("formats") = formats; df.attr("types") = vartype_kk; df.attr("val.labels") = valLabels; df.attr("var.labels") = varLabels; df.attr("version") = versionIV; df.attr("label.table") = labelList; df.attr("expansion.fields") = ch; df.attr("strl") = strlvalues; df.attr("byteorder") = wrap(byteorder); df.attr("orig.dim") = dim; return df; } readstata13/NAMESPACE0000644000176200001440000000113515002626101013576 0ustar liggesusers# Generated by roxygen2: do not edit by hand export("varlabel<-") export(as.caldays) export(get.frames) export(get.label) export(get.label.name) export(get.label.tables) export(get.lang) export(get.origin.codes) export(read.dta13) export(read.dtas) export(save.dta13) export(set.label) export(set.lang) export(stbcal) export(varlabel) import(Rcpp) importFrom(stats,complete.cases) importFrom(stats,na.omit) importFrom(stats,setNames) importFrom(utils,download.file) importFrom(utils,setTxtProgressBar) importFrom(utils,txtProgressBar) importFrom(utils,unzip) useDynLib(readstata13, .registration = TRUE) readstata13/LICENSE0000644000176200001440000004315214372711643013410 0ustar liggesusersGNU GENERAL PUBLIC LICENSE Version 2, June 1991 Copyright (C) 1989, 1991 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. Preamble The licenses for most software are designed to take away your freedom to share and change it. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change free software--to make sure the software is free for all its users. This General Public License applies to most of the Free Software Foundation's software and to any other program whose authors commit to using it. (Some other Free Software Foundation software is covered by the GNU Lesser General Public License instead.) You can apply it to your programs, too. When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for this service if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs; and that you know you can do these things. To protect your rights, we need to make restrictions that forbid anyone to deny you these rights or to ask you to surrender the rights. These restrictions translate to certain responsibilities for you if you distribute copies of the software, or if you modify it. For example, if you distribute copies of such a program, whether gratis or for a fee, you must give the recipients all the rights that you have. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights. We protect your rights with two steps: (1) copyright the software, and (2) offer you this license which gives you legal permission to copy, distribute and/or modify the software. Also, for each author's protection and ours, we want to make certain that everyone understands that there is no warranty for this free software. If the software is modified by someone else and passed on, we want its recipients to know that what they have is not the original, so that any problems introduced by others will not reflect on the original authors' reputations. Finally, any free program is threatened constantly by software patents. We wish to avoid the danger that redistributors of a free program will individually obtain patent licenses, in effect making the program proprietary. To prevent this, we have made it clear that any patent must be licensed for everyone's free use or not licensed at all. The precise terms and conditions for copying, distribution and modification follow. GNU GENERAL PUBLIC LICENSE TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 0. This License applies to any program or other work which contains a notice placed by the copyright holder saying it may be distributed under the terms of this General Public License. The "Program", below, refers to any such program or work, and a "work based on the Program" means either the Program or any derivative work under copyright law: that is to say, a work containing the Program or a portion of it, either verbatim or with modifications and/or translated into another language. (Hereinafter, translation is included without limitation in the term "modification".) Each licensee is addressed as "you". Activities other than copying, distribution and modification are not covered by this License; they are outside its scope. The act of running the Program is not restricted, and the output from the Program is covered only if its contents constitute a work based on the Program (independent of having been made by running the Program). Whether that is true depends on what the Program does. 1. You may copy and distribute verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice and disclaimer of warranty; keep intact all the notices that refer to this License and to the absence of any warranty; and give any other recipients of the Program a copy of this License along with the Program. You may charge a fee for the physical act of transferring a copy, and you may at your option offer warranty protection in exchange for a fee. 2. You may modify your copy or copies of the Program or any portion of it, thus forming a work based on the Program, and copy and distribute such modifications or work under the terms of Section 1 above, provided that you also meet all of these conditions: a) You must cause the modified files to carry prominent notices stating that you changed the files and the date of any change. b) You must cause any work that you distribute or publish, that in whole or in part contains or is derived from the Program or any part thereof, to be licensed as a whole at no charge to all third parties under the terms of this License. c) If the modified program normally reads commands interactively when run, you must cause it, when started running for such interactive use in the most ordinary way, to print or display an announcement including an appropriate copyright notice and a notice that there is no warranty (or else, saying that you provide a warranty) and that users may redistribute the program under these conditions, and telling the user how to view a copy of this License. (Exception: if the Program itself is interactive but does not normally print such an announcement, your work based on the Program is not required to print an announcement.) These requirements apply to the modified work as a whole. If identifiable sections of that work are not derived from the Program, and can be reasonably considered independent and separate works in themselves, then this License, and its terms, do not apply to those sections when you distribute them as separate works. But when you distribute the same sections as part of a whole which is a work based on the Program, the distribution of the whole must be on the terms of this License, whose permissions for other licensees extend to the entire whole, and thus to each and every part regardless of who wrote it. Thus, it is not the intent of this section to claim rights or contest your rights to work written entirely by you; rather, the intent is to exercise the right to control the distribution of derivative or collective works based on the Program. In addition, mere aggregation of another work not based on the Program with the Program (or with a work based on the Program) on a volume of a storage or distribution medium does not bring the other work under the scope of this License. 3. You may copy and distribute the Program (or a work based on it, under Section 2) in object code or executable form under the terms of Sections 1 and 2 above provided that you also do one of the following: a) Accompany it with the complete corresponding machine-readable source code, which must be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, b) Accompany it with a written offer, valid for at least three years, to give any third party, for a charge no more than your cost of physically performing source distribution, a complete machine-readable copy of the corresponding source code, to be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, c) Accompany it with the information you received as to the offer to distribute corresponding source code. (This alternative is allowed only for noncommercial distribution and only if you received the program in object code or executable form with such an offer, in accord with Subsection b above.) The source code for a work means the preferred form of the work for making modifications to it. For an executable work, complete source code means all the source code for all modules it contains, plus any associated interface definition files, plus the scripts used to control compilation and installation of the executable. However, as a special exception, the source code distributed need not include anything that is normally distributed (in either source or binary form) with the major components (compiler, kernel, and so on) of the operating system on which the executable runs, unless that component itself accompanies the executable. If distribution of executable or object code is made by offering access to copy from a designated place, then offering equivalent access to copy the source code from the same place counts as distribution of the source code, even though third parties are not compelled to copy the source along with the object code. 4. You may not copy, modify, sublicense, or distribute the Program except as expressly provided under this License. Any attempt otherwise to copy, modify, sublicense or distribute the Program is void, and will automatically terminate your rights under this License. However, parties who have received copies, or rights, from you under this License will not have their licenses terminated so long as such parties remain in full compliance. 5. You are not required to accept this License, since you have not signed it. However, nothing else grants you permission to modify or distribute the Program or its derivative works. These actions are prohibited by law if you do not accept this License. Therefore, by modifying or distributing the Program (or any work based on the Program), you indicate your acceptance of this License to do so, and all its terms and conditions for copying, distributing or modifying the Program or works based on it. 6. Each time you redistribute the Program (or any work based on the Program), the recipient automatically receives a license from the original licensor to copy, distribute or modify the Program subject to these terms and conditions. You may not impose any further restrictions on the recipients' exercise of the rights granted herein. You are not responsible for enforcing compliance by third parties to this License. 7. If, as a consequence of a court judgment or allegation of patent infringement or for any other reason (not limited to patent issues), conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot distribute so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not distribute the Program at all. For example, if a patent license would not permit royalty-free redistribution of the Program by all those who receive copies directly or indirectly through you, then the only way you could satisfy both it and this License would be to refrain entirely from distribution of the Program. If any portion of this section is held invalid or unenforceable under any particular circumstance, the balance of the section is intended to apply and the section as a whole is intended to apply in other circumstances. It is not the purpose of this section to induce you to infringe any patents or other property right claims or to contest validity of any such claims; this section has the sole purpose of protecting the integrity of the free software distribution system, which is implemented by public license practices. Many people have made generous contributions to the wide range of software distributed through that system in reliance on consistent application of that system; it is up to the author/donor to decide if he or she is willing to distribute software through any other system and a licensee cannot impose that choice. This section is intended to make thoroughly clear what is believed to be a consequence of the rest of this License. 8. If the distribution and/or use of the Program is restricted in certain countries either by patents or by copyrighted interfaces, the original copyright holder who places the Program under this License may add an explicit geographical distribution limitation excluding those countries, so that distribution is permitted only in or among countries not thus excluded. In such case, this License incorporates the limitation as if written in the body of this License. 9. The Free Software Foundation may publish revised and/or new versions of the General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Program specifies a version number of this License which applies to it and "any later version", you have the option of following the terms and conditions either of that version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of this License, you may choose any version ever published by the Free Software Foundation. 10. If you wish to incorporate parts of the Program into other free programs whose distribution conditions are different, write to the author to ask for permission. For software which is copyrighted by the Free Software Foundation, write to the Free Software Foundation; we sometimes make exceptions for this. Our decision will be guided by the two goals of preserving the free status of all derivatives of our free software and of promoting the sharing and reuse of software generally. NO WARRANTY 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. END OF TERMS AND CONDITIONS How to Apply These Terms to Your New Programs If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms. To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively convey the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. {description} Copyright (C) {year} {fullname} This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. Also add information on how to contact you by electronic and paper mail. If the program is interactive, make it output a short notice like this when it starts in an interactive mode: Gnomovision version 69, Copyright (C) year name of author Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. This is free software, and you are welcome to redistribute it under certain conditions; type `show c' for details. The hypothetical commands `show w' and `show c' should show the appropriate parts of the General Public License. Of course, the commands you use may be called something other than `show w' and `show c'; they could even be mouse-clicks or menu items--whatever suits your program. You should also get your employer (if you work as a programmer) or your school, if any, to sign a "copyright disclaimer" for the program, if necessary. Here is a sample; alter the names: Yoyodyne, Inc., hereby disclaims all copyright interest in the program `Gnomovision' (which makes passes at compilers) written by James Hacker. {signature of Ty Coon}, 1 April 1989 Ty Coon, President of Vice This General Public License does not permit incorporating your program into proprietary programs. If your program is a subroutine library, you may consider it more useful to permit linking proprietary applications with the library. If this is what you want to do, use the GNU Lesser General Public License instead of this License. readstata13/NEWS.md0000644000176200001440000001024615002657623013475 0ustar liggesusers## readstata13 0.11.0 - Initial support for Stata 18. Import .dtas files (Stata framesets) via `read.dtas()`. Alias variables are currently ignored with a warning. - The `select.cols` argument accepts either variable names or column indices. - Fix compilation on musl and other non-glibc based systems. - Add package alias to readstata13.Rd ## readstata13 0.10.1 - Fix writing `NA` and `NA_character_` values - Fix writing of STRLs on big endian systems ## readstata13 0.10.0 - Fix sortlist attribute for dta format 119 - Fix compress option. In the past, unwanted conversions to integer type could occur. - Fix encoding issues in variable and data labels - Fix for reading/writing of format 119 - Fix build on FreeBSD - New feature: improved handling of time and date formats - New feature: collect warnings from read.dta13 ## readstata13 0.9.2 - Fix build on OSX ## readstata13 0.9.1 - Allow reading only pre-selected variables - Experimental support for format 119 - Improve partial reading - Export of binary data from dta-files - New function get.label.tables() to show all Stata label sets - Fix check for duplicate labels - Fixes in set.lang ## readstata13 0.9.0 - Generate unique factor labels to prevent errors in factor definition - Check interrupt for long read - Fix storage size of character vectors in save.dta13 - Fix saving characters containing missings - Implement partial reading of dta-files - Fix an integer bug with saving data.frames of length requiring uint64_t ## readstata13 0.8.5 - Fix errors on big-endian systems ## readstata13 0.8.4 - Fix valgrind errors. converting from dta.write to writestr - Fix for empty data label - Make replace.strl default ## readstata13 0.8.3 - Restrict length of varnames to 32 chars for compatibility with Stata 14 - Stop compression of doubles as floats. Now test if compression of doubles as interger types is possible. - Add many function tests ## readstata13 0.8.2 - Save NA values in character vector as empty string - Convert.underscore=T will convert all non-literal characters to underscores - Fix saving of Dates - Save with convert.factors by default - Test for NaN and inf values while writing missing values and replace with NA - Remove message about saving factors ## readstata13 0.8.1 - Convert non-integer variables to factors (nonint.factors=T) - Working with strL variables is now a lot faster (thank to Magnus Thor Torfason) - Fix handling of large datasets - Some code cleanups ## readstata13 0.8 - Implement reading all version prior 13. - Clean up code. - Fix a crash when varlables do not match ncols. - Update leap seconds R code with foreign. ## readstata13 0.7.1 - Fix saving of files > 2GB ## readstata13 0.7 - read and write Stata 14 files (ver 118) - Fix save for variables without non-missing values - Read strings from different file encodings - Code cleanups ## readstata13 0.6.1 - Fix heap overflow ## readstata13 0.6 - Various fixes - Reading stbcal-files ## readstata13 0.5-3 - Write dta-files - Read/write LSF and MSF files - Source testing and cleaning - Support for multiple label languages (see http://www.stata.com/manuals13/dlabellanguage.pdf) - Additional tools for label handling ## readstata13 0.4 - Convert.dates from foreign::read.dta() - Handle different NA values - Convert strings to system encoding - Some checks on label assignment ## readstata13 0.3 - Reading file from url. Example: `read.dta13("http://www.stata-press.com/data/r13/auto.dta")` - Convert.underscore from foreign::read.dta(): converts _ to . - Missing.type parts from foreign::read.dta(). If TRUE return "missing" - New replace.strl argument to replace the reference to a STRL string in the data.frame with the actual value ## readstata13 0.2 - Read stata characteristics and save them in extension.table attribute - More robust handling of factor labels - Set file encoding for all strings and convert them to system encoding - Fixed compiler warnings ## readstata13 0.1 - Reading data files and create a data.frame - Assign variable names - Read the new strL strings and save them as attribute - Convert stata label to factors and save them as attribute - Read some meta data (timestamp, dataset label, formats,...) readstata13/inst/0000755000176200001440000000000015002660533013343 5ustar liggesusersreadstata13/inst/include/0000755000176200001440000000000015002626101014757 5ustar liggesusersreadstata13/inst/include/readstata.h0000644000176200001440000001363615002626101017111 0ustar liggesusers/* * Copyright (C) 2015-2024 Jan Marvin Garbuszus and Sebastian Jeworutzki * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation; either version 2 of the License, or (at your * option) any later version. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for * more details. * * You should have received a copy of the GNU General Public License along * with this program. If not, see . */ #ifndef READSTATA_H #define READSTATA_H // check for 1.0.8.0 #if RCPP_DEV_VERSION >= 1000800 #include #else #include #endif #include #include #include #include #define GCC_VERSION (__GNUC__ * 10000 \ + __GNUC_MINOR__ * 100 \ + __GNUC_PATCHLEVEL__) /* Test for GCC < 4.9.0 */ #if GCC_VERSION < 40900 & !__clang__ typedef signed char int8_t; typedef unsigned char uint8_t; typedef signed short int16_t; typedef unsigned short uint16_t; typedef signed int int32_t; typedef unsigned int uint32_t; #else #include #endif #if defined(__FreeBSD__) || defined(__NetBSD__) || defined(__DragonFly__) || defined(__OpenBSD__) || defined(__APPLE__) || defined(__ANDROID__) || (defined(__linux__) && !defined(__GLIBC__)) # define fseeko64 fseeko #endif #include "read_dta.h" #include "read_pre13_dta.h" #include "statadefines.h" #include "swap_endian.h" template T readbin( T t , FILE * file, bool swapit) { if (fread(&t, sizeof(t), 1, file) != 1) { if (feof(file)) return 0; // this is expected after reading the labeltable } else if (ferror(file)){ Rcpp::warning("num: a binary read error occurred."); } if (swapit==0) return(t); else return(swap_endian(t)); } template T readuint48( T t , FILE * file, bool swapit) { char uint48[6]; if (fread(uint48, sizeof(uint48), 1, file) != 1) { if (feof(file)) return 0; // this is expected after reading the labeltable } else if (ferror(file)){ Rcpp::warning("num: a binary read error occurred."); } t = *(uint64_t *)&uint48; if (swapit==0) return(t); else return(swap_endian(t)); } static void readstring(std::string &mystring, FILE * fp, int nchar) { if (!fread(&mystring[0], nchar, 1, fp)) Rcpp::warning("char: a binary read error occurred"); } inline void test(std::string testme, FILE * file) { std::string test(testme.size(), '\0'); readstring(test,file, test.size()); if (testme.compare(test)!=0) { fclose(file); Rcpp::warning("\n testme:%s \n test: %s\n", testme.c_str(), test.c_str()); Rcpp::stop("When attempting to read %s: Something went wrong!", testme.c_str()); } } template static void writebin(T t, std::fstream& dta, bool swapit) { if (swapit==1){ T t_s = swap_endian(t); dta.write((char*)&t_s, sizeof(t_s)); } else { dta.write((char*)&t, sizeof(t)); } } template static void writestr(std::string val_s, T len, std::fstream& dta) { std::stringstream val_stream; val_stream << std::left << std::setw(len) << std::setfill('\0') << val_s; std::string val_strl = val_stream.str(); dta.write(val_strl.c_str(),val_strl.length()); } inline Rcpp::IntegerVector calc_rowlength(Rcpp::IntegerVector vartype) { uint32_t k = vartype.size(); Rcpp::IntegerVector rlen(k); // calculate row length in byte for (uint32_t i=0; i inline Rcpp::IntegerVector choose(T x, T y) { // ToDo: Maybe we can skip the select and nselect in read_dta.cpp if we match // the other way around and use Rcpp::is_na on the result which then could be // used as an additional index Rcpp::IntegerVector mm = Rcpp::match(x, y); if (Rcpp::any(Rcpp::is_na(mm))) { Rcpp::LogicalVector ll = !Rcpp::is_na(mm); Rcpp::CharacterVector ms = Rcpp::as(x[ll==0]); // does not work if ms contains multiple names: Rcpp::as(ms) Rcpp::Rcout << "selected.col " << ms << " was not found in dta-file." << std::endl; } // report position for found cases mm = Rcpp::match(y, x); return(mm); } // calculate the maximum jump. This calculates the maximum space we can skip if // reading only a single variable. Before we skipped over each variable. Now we // skip over them combined. Therefore if a value in x is positive push it // into a new vector. If negative, sum the length up. inline Rcpp::IntegerVector calc_jump(Rcpp::IntegerVector x) { Rcpp::IntegerVector y; int64_t val = 0; bool last = 0; uint32_t k = x.size(); for (uint32_t i=0; i 0) & (last == 0)) y.push_back(val); val = value; y.push_back(val); last = 1; } if ((i+1 == k) & (last == 0)) { y.push_back(val); } } return(y); } #endif readstata13/inst/include/statadefines.h0000644000176200001440000000451115002626101017603 0ustar liggesusers/* * Copyright (C) 2015-2023 Jan Marvin Garbuszus and Sebastian Jeworutzki * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation; either version 2 of the License, or (at your * option) any later version. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for * more details. * * You should have received a copy of the GNU General Public License along * with this program. If not, see . */ #ifndef STATADEFINES #define STATADEFINES /* Test for a little-endian machine */ #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ #define sbyteorder "LSF" #define SBYTEORDER 2 #else #define sbyteorder "MSF" #define SBYTEORDER 1 #endif #define swapit FALSE /*Define missings*/ #define STATA_BYTE_NA_MIN -127 #define STATA_BYTE_NA_MAX +100 #define STATA_BYTE_NA +101 #define STATA_BYTE_NA_104 +127 // guess. #define STATA_SHORTINT_NA_MIN -32767 #define STATA_SHORTINT_NA_MAX +32740 #define STATA_SHORTINT_NA +32741 #define STATA_INT_NA_MIN -2147483647 #define STATA_INT_NA_MAX +2147483620 #define STATA_INT_NA +2147483621 #define STATA_INT_NA_108 2147483647 #define STATA_FLOAT_NA_MAX (1+15/pow(16.0,1)+15/pow(16.0,2)+15/pow(16.0,3)+15/pow(16.0,4)+15/pow(16.0,5)+14/pow(16.0,6))*pow(2.0,126) #define STATA_FLOAT_NA_MIN -STATA_FLOAT_NA_MAX #define STATA_FLOAT_NA 1+pow(2.0,127) #define STATA_DOUBLE_NA_MAX (1+15/pow(16.0,1)+15/pow(16.0,2)+15/pow(16.0,3)+15/pow(16.0,4)+15/pow(16.0,5)+15/pow(16.0,6)+15/pow(16.0,7)+15/pow(16.0,8)+15/pow(16.0,9)+15/pow(16.0,10)+15/pow(16.0,11)+15/pow(16.0,12)+15/pow(16.0,13))*pow(2.0,1022) #define STATA_DOUBLE_NA_MIN -1*(1+15/pow(16.0,1)+15/pow(16.0,2)+15/pow(16.0,3)+15/pow(16.0,4)+15/pow(16.0,5)+15/pow(16.0,6)+15/pow(16.0,7)+15/pow(16.0,8)+15/pow(16.0,9)+15/pow(16.0,10)+15/pow(16.0,11)+15/pow(16.0,12)+15/pow(16.0,13))*pow(2.0,1023) #define STATA_DOUBLE_NA pow(2.0,1023) #define STATA_BYTE 65530 #define STATA_SHORTINT 65529 #define STATA_INT 65528 #define STATA_FLOAT 65527 #define STATA_DOUBLE 65526 #define STATA_ALIAS 65525 #define STATA_STR 2045 #define STATA_SHORT_STR 244 #define STATA_STRL 32768 #endif readstata13/inst/include/read_pre13_dta.h0000644000176200001440000000202015002626101017677 0ustar liggesusers/* * Copyright (C) 2015 Jan Marvin Garbuszus and Sebastian Jeworutzki * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation; either version 2 of the License, or (at your * option) any later version. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for * more details. * * You should have received a copy of the GNU General Public License along * with this program. If not, see . */ #ifndef READPRE13DTA_H #define READPRE13DTA_H Rcpp::List read_pre13_dta(FILE * file, const bool missing, const Rcpp::IntegerVector selectrows, const Rcpp::CharacterVector selectcols_chr, const Rcpp::IntegerVector selectcols_int); #endif readstata13/inst/include/swap_endian.h0000644000176200001440000000224214372711643017437 0ustar liggesusers#ifndef SWAP_ENDIAN #define SWAP_ENDIAN /*#include */ #include #define GCC_VERSION (__GNUC__ * 10000 \ + __GNUC_MINOR__ * 100 \ + __GNUC_PATCHLEVEL__) /* Test for GCC < 4.8.0 */ #if GCC_VERSION < 40800 & !__clang__ static inline unsigned short __builtin_bswap16(unsigned short a) { return (a<<8)|(a>>8); } #endif template T swap_endian(T t) { if (typeid(T) == typeid(int16_t)) return __builtin_bswap16(t); if (typeid(T) == typeid(uint16_t)) return __builtin_bswap16(t); if (typeid(T) == typeid(int32_t)) return __builtin_bswap32(t); if (typeid(T) == typeid(uint32_t)) return __builtin_bswap32(t); if (typeid(T) == typeid(int64_t)) return __builtin_bswap64(t); if (typeid(T) == typeid(uint64_t)) return __builtin_bswap64(t); union v { double d; float f; uint32_t i32; uint64_t i64; } val; if (typeid(T) == typeid(float)){ val.f = t; val.i32 = __builtin_bswap32(val.i32); return val.f; } if (typeid(T) == typeid(double)){ val.d = t; val.i64 = __builtin_bswap64(val.i64); return val.d; } else return t; } #endif readstata13/inst/include/read_dta.h0000644000176200001440000000212315002626101016671 0ustar liggesusers/* * Copyright (C) 2015 Jan Marvin Garbuszus and Sebastian Jeworutzki * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation; either version 2 of the License, or (at your * option) any later version. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for * more details. * * You should have received a copy of the GNU General Public License along * with this program. If not, see . */ #ifndef READDTA_H #define READDTA_H Rcpp::List read_dta(FILE * file, const bool missing, const Rcpp::IntegerVector selectrows, const Rcpp::CharacterVector selectcols_chr, const Rcpp::IntegerVector selectcols_int, const bool strlexport, const Rcpp::CharacterVector strlpath); #endif readstata13/inst/include/read_data.h0000644000176200001440000000215114372711643017052 0ustar liggesusers/* * Copyright (C) 2015 Jan Marvin Garbuszus and Sebastian Jeworutzki * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation; either version 2 of the License, or (at your * option) any later version. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for * more details. * * You should have received a copy of the GNU General Public License along * with this program. If not, see . */ #ifndef READDATA_H #define READDATA_H Rcpp::List read_data(FILE * file, const Rcpp::IntegerVector vartype_kk, const bool missing, const int8_t release, const uint64_t nn, uint32_t kk, const Rcpp::IntegerVector vartype_sj, const std::string byteorder, const bool swapit); #endif readstata13/inst/doc/0000755000176200001440000000000015002660533014110 5ustar liggesusersreadstata13/inst/doc/readstata13_basic_manual.R0000644000176200001440000001372715002660532021056 0ustar liggesusers## ----setup, include = FALSE--------------------------------------------------- library(readstata13) dir.create("res") options(rmarkdown.html_vignette.check_title = FALSE) knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----------------------------------------------------------------------------- data (cars) # Save the 'cars' dataset to a Stata file save.dta13(cars, file = "res/cars.dta") # Read the saved Stata file back into R dat <- read.dta13("res/cars.dta") ## ----------------------------------------------------------------------------- # prints the attributes attributes(dat) ## ----------------------------------------------------------------------------- # Save the cars dataset as a Stata 7 dta file save.dta13(cars, "res/cars_version.dta", version = 7) # Read the file back and check its reported version dat3 <- read.dta13("res/cars_version.dta") attr(dat3, "version") ## ----------------------------------------------------------------------------- library(readstata13) x <- read.dta13(system.file("extdata/statacar.dta", package = "readstata13"), convert.factors = FALSE) ## ----------------------------------------------------------------------------- attr(x, "var.labels") ## ----------------------------------------------------------------------------- varlabel(x, var.name = "type") ## ----------------------------------------------------------------------------- attr(x, "val.labels") ## ----------------------------------------------------------------------------- attr(x, "label.table")$type_en ## ----------------------------------------------------------------------------- get.label.name(x, var.name = "type") get.label(x, "type_en") ## ----------------------------------------------------------------------------- # Create a factor variable 'type_en' from the 'type' variable using stored labels x$type_en <- set.label(x, "type") # Display the original numeric column and the new factor column x[, c("type", "type_en")] ## ----------------------------------------------------------------------------- # Check available languages and the default language get.lang(x) # Create a factor using the German labels x$type_de <- set.label(x, "type", lang = "de") # Display the original and both language factor columns x[, c("type", "type_en", "type_de")] ## ----eval = isTRUE(requireNamespace("labelled"))------------------------------ # Requires labelled package version > 2.8.0 due to a past bug library(labelled) # Read the data and convert to the 'labelled' class format xl <- read.dta13(system.file("extdata/statacar.dta", package = "readstata13"), convert.factors = FALSE) xl <- to_labelled(xl) xl ## ----eval = isTRUE(requireNamespace("expss")) & isTRUE(requireNamespace("labelled"))---- library(expss) # Example: Use expss to create a table summarizing horse power by car brand # First, handle missing or negative HP values xl[xl$hp < 0 | is.na(xl$hp), "hp"] <- NA # Create the table using expss piping syntax xl %>% tab_cells(hp) %>% # Specify the variable for cells tab_cols(brand) %>% # Specify the variable for columns tab_stat_mean_sd_n() %>% # Calculate mean, standard deviation, and N tab_pivot() %>% # Pivot the table set_caption("Horse power by car brand.") # Add a caption ## ----------------------------------------------------------------------------- # Read only the first 3 rows of the dataset dat_1 <- read.dta13("res/cars.dta", select.rows = c(1,3)); dat_1 # Read only the 'dist' variable from the dataset dat_2 <- read.dta13("res/cars.dta", select.cols = "dist"); head(dat_2) ## ----------------------------------------------------------------------------- # Save the cars dataset with compression enabled save.dta13(cars, file = "res/cars_compress.dta", compress = TRUE) # Import the compressed file and check the resulting data types dat2 <- read.dta13(file = "res/cars_compress.dta") attr(dat2, "types") ## ----------------------------------------------------------------------------- rbind(file.info("res/cars.dta")["size"], file.info("res/cars_compress.dta")["size"]) ## ----------------------------------------------------------------------------- dtas_path <- system.file("extdata", "myproject2.dtas", package="readstata13") # Get information about frames in the .dtas file get.frames(dtas_path) ## ----------------------------------------------------------------------------- # Read all frames from the .dtas file read.dtas(dtas_path) ## ----------------------------------------------------------------------------- # Read only the "counties" frame read.dtas(dtas_path, select.frames = "counties") ## ----------------------------------------------------------------------------- # Read frames with different column selections for each read.dtas(dtas_path, read.dta13.options = list(counties = list(select.cols = "median_income"), persons = list(select.cols = "income"))) ## ----------------------------------------------------------------------------- # Create a directory for exporting strLs dir.create("res/strls/") # Read a dta file containing strLs and export their content dat_strl <- read.dta13("stata_strl.dta", strlexport = TRUE, strlpath = "res/strls/") # List the files created in the export directory. # The filenames indicate the variable and observation index (e.g., 15_1). dir("res/strls/") ## ----------------------------------------------------------------------------- # Read the content of the text file strL export readLines("res/strls/15_1") ## ----fig.alt="Display of the R logo extracted from a long string."------------ library(png) library(grid) # grid is needed for grid.raster # Read the PNG image file img <- readPNG("res/strls/16_1") # Display the image grid::grid.raster(img) ## ----include=FALSE------------------------------------------------------------ # Clean up the created directory and files unlink("res/", recursive = TRUE) readstata13/inst/doc/readstata13_basic_manual.html0000644000176200001440000042663315002660533021626 0ustar liggesusers readstata13: Basic Manual

readstata13: Basic Manual

Jan Marvin Garbuszus & Sebastian Jeworutzki

2025-04-25

The readstata13 package was developed to address compatibility issues arising from changes in the Stata 13 dta file format. Prior to Stata 13, packages like foreign could handle dta files. However, Stata 13 introduced a new format that resembles XML.1 Recognizing the need for a new solution, we (Jan Marvin Garbuszus and Sebastian Jeworutzki) created readstata13. Leveraging Rcpp for performance, the package has evolved into a comprehensive tool for working with dta files in R.

Key features of readstata13 include:

  • Broad Format Support: Ability to import and export dta files across a wide range of Stata versions, including many undocumented formats.
  • Handling Advanced Features: Support for features like string encoding, multilingual labels, business calendars, long strings (strL), frames, and embedded binary data.
  • Enhanced Functionality: Built as a direct replacement for foreign’s dta functions, with added capabilities for improved label handling (including generation) and partial data reading (selecting specific rows or variables).

Core Functionality: Reading and Writing Stata files

Importing a Stata file using readstata13 is straightforward, similar to using the foreign package. The primary function is read.dta13. To save an R data frame to the Stata dta format, you use the save.dta13 function.

data (cars)

# Save the 'cars' dataset to a Stata file
save.dta13(cars, file = "res/cars.dta")

# Read the saved Stata file back into R
dat <- read.dta13("res/cars.dta")

Beyond the data itself, readstata13 preserves important metadata from the Stata file. This information is stored as attributes of the imported data frame.

# prints the attributes
attributes(dat)
#> $row.names
#>  [1]  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
#> [26] 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50
#> 
#> $names
#> [1] "speed" "dist" 
#> 
#> $class
#> [1] "data.frame"
#> 
#> $datalabel
#> [1] "Written by R"
#> 
#> $time.stamp
#> [1] "25 Apr 2025 12:18"
#> 
#> $formats
#> [1] "%9.0g" "%9.0g"
#> 
#> $types
#> [1] 65526 65526
#> 
#> $val.labels
#>       
#> "" "" 
#> 
#> $var.labels
#> [1] "" ""
#> 
#> $version
#> [1] 117
#> 
#> $label.table
#> list()
#> 
#> $expansion.fields
#> list()
#> 
#> $byteorder
#> [1] "LSF"
#> 
#> $orig.dim
#> [1] 50  2
#> 
#> $data.label
#> character(0)

Examining the attributes reveals details such as the Stata format version (e.g., format 117, introduced in Stata 13), a data label, a timestamp, and information about the data types and formats used in Stata. In this example, the save.dta13 function wrote the numeric data from R as binary doubles in the dta file. The byte order (endianness) is also recorded; readstata13 is designed to handle both Little Endian (used here) and Big Endian formats during reading and writing.2

The package automatically manages the conversion of Stata’s missing values, value labels, and variable labels during both import and export.

Supported Stata Versions

A key advantage of readstata13 is its ability to write dta files compatible with older and newer versions of Stata. This is controlled using the version argument in the save.dta13 function. The table below lists supported Stata versions and their corresponding file formats:

Stata Version File Format
18 - 19 121
18 - 19 120
15 - 19 119
14 - 19 118
13 117
12 115
10 - 11 114
8 - 9 113
7 110
6 108

While this table shows the most common formats, readstata13 supports reading files from Stata version 1 (format 102) up to the latest format 121 (used for files with over 32,767 variables, readable by Stata 18 & 19 MP).3 The dta format has evolved over time to accommodate larger datasets and longer variable names or labels. Although readstata13 can read virtually any format, its ability to write files that fit within Stata’s historical limits depends on the data size. For general compatibility, it’s recommended to target versions 7 or later (formats 110+), which aligns with the default in foreign::write.dta.

Here’s an example of saving a file compatible with Stata 7:

# Save the cars dataset as a Stata 7 dta file
save.dta13(cars, "res/cars_version.dta", version = 7)

# Read the file back and check its reported version
dat3 <- read.dta13("res/cars_version.dta")
attr(dat3, "version")
#> [1] 110

Working with Labelled Data

Stata datasets often include rich metadata like variable and value labels. Since base R data frames don’t natively support this, readstata13 stores this information in various attributes of the imported data frame, mirroring the approach used by foreign::read.dta.

Let’s use the example dataset “statacar.dta” included with the readstata13 package. We’ll initially import it without converting categorical data to R factors, keeping the original numeric codes.

library(readstata13)
x <- read.dta13(system.file("extdata/statacar.dta", 
                            package = "readstata13"),
                convert.factors = FALSE)

Variable labels are accessible via the var.labels attribute:

attr(x, "var.labels")
#>  [1] "Numeric ID"             "Brand of car"           "Car model"             
#>  [4] "Car classification"     "Horse Power"            "Maximum speed"         
#>  [7] ""                       ""                       "Launch date"           
#> [10] "Launch date (calendar)" ""

You can retrieve the label for a specific variable using the varlabel() function:

varlabel(x, var.name = "type")
#>                 type 
#> "Car classification"

Value labels, which map numeric codes to descriptive text, are stored in a more structured way. The val.labels attribute indicates which variables have associated value labels. The actual label definitions (the mapping from codes to labels) are stored as a list in the label.table attribute.

In our example dataset, only one column has value labels:

attr(x, "val.labels")
#>                                 type_en                                         
#>        ""        ""        "" "type_en"        ""        ""        ""        "" 
#>                               
#>        ""        ""        ""

The corresponding label table for the ‘type’ variable is named type_en. It’s a named vector where the numeric codes are the vector values and the labels are the names:

attr(x, "label.table")$type_en
#>         min    Off-Road    Roadster    City car  Family car         max 
#> -2147483647           1           2           3           4  2147483620

Convenience functions like get.label.name() and get.label() provide alternative ways to access this information:

get.label.name(x, var.name = "type")
#>      type 
#> "type_en"
get.label(x, "type_en")
#>         min    Off-Road    Roadster    City car  Family car         max 
#> -2147483647           1           2           3           4  2147483620

A common task is converting a numeric variable with value labels into an R factor. readstata13 simplifies this with the set.label() function, which uses the stored label information to create the factor levels.

# Create a factor variable 'type_en' from the 'type' variable using stored labels
x$type_en <- set.label(x, "type")

# Display the original numeric column and the new factor column
x[, c("type", "type_en")]
#>          type    type_en
#> 1           2   Roadster
#> 2           4 Family car
#> 3           3   City car
#> 4           4 Family car
#> 5           1   Off-Road
#> 6           3   City car
#> 7  2147483620        max
#> 8 -2147483647        min

Multi-Language Support for Labels

Stata allows datasets to include labels in multiple languages. readstata13 supports this, and the lang option in set.label() lets you specify which language’s labels to use when creating a factor.

# Check available languages and the default language
get.lang(x)
#> Available languages:
#>  en
#>  de
#> 
#> Default language:
#>  en

# Create a factor using the German labels
x$type_de <- set.label(x, "type", lang = "de")

# Display the original and both language factor columns
x[, c("type", "type_en", "type_de")]
#>          type    type_en      type_de
#> 1           2   Roadster   Sportwagen
#> 2           4 Family car Familienauto
#> 3           3   City car    Stadtauto
#> 4           4 Family car Familienauto
#> 5           1   Off-Road Geländewagen
#> 6           3   City car    Stadtauto
#> 7  2147483620        max          max
#> 8 -2147483647        min          min

Compatibility with Other Packages

readstata13 is designed to integrate well with other R packages that work with labelled data, such as labelled and expss.

# Requires labelled package version > 2.8.0 due to a past bug
library(labelled)

# Read the data and convert to the 'labelled' class format
xl <- read.dta13(system.file("extdata/statacar.dta", 
                             package = "readstata13"),
                convert.factors = FALSE)

xl <- to_labelled(xl)
xl
#> # A tibble: 8 × 11
#>      id brand   model    type     hp         max  mileage  ecar ldate ldatecal  
#> * <int> <chr>   <chr>   <int>  <int>       <dbl>    <dbl> <int> <int> <date>    
#> 1     1 Meyer   Spee…  2   e0    150    1.77e  2  1.02e 1     0     1 2001-01-03
#> 2     2 Meyer   Happ…  4   e0     98    1.45e  2  5.60e 0     0   247 2001-12-31
#> 3     3 Akiko   Susu…  3   e0     45    1.19e  2 NA           0    14 2001-01-23
#> 4     4 Akiko   Susu…  4   e0     80    1.27e  2  6.80e 0     0   134 2001-07-16
#> 5     5 Hutch   Lumb…  1   e0    180    1.56e  2  1.42e 1     0   110 2001-06-11
#> 6     6 Erikson E-Ca…  3   e0     NA   NA        NA           1   100 2001-05-25
#> 7     7 Erikson Maxi…  2.15e9  32740    8.99e307  1.70e38   100    19 2001-01-30
#> 8     7 Erikson Mimi… -2.15e9 -32767 -Inf        -1.70e38  -127     1 2001-01-03
#> # ℹ 1 more variable: modelStrL <chr>

Packages like expss can utilize the label information stored by readstata13 (and converted by labelled) for creating descriptive tables and plots.

library(expss)
#> Loading required package: maditr
#> 
#> To aggregate data: take(mtcars, mean_mpg = mean(mpg), by = am)
#> 
#> Use 'expss_output_rnotebook()' to display tables inside R Notebooks.
#>  To return to the console output, use 'expss_output_default()'.
#> 
#> Attaching package: 'expss'
#> The following object is masked from 'package:labelled':
#> 
#>     is.labelled

# Example: Use expss to create a table summarizing horse power by car brand
# First, handle missing or negative HP values
xl[xl$hp < 0 | is.na(xl$hp), "hp"] <- NA

# Create the table using expss piping syntax
xl %>%
  tab_cells(hp) %>% # Specify the variable for cells
  tab_cols(brand) %>% # Specify the variable for columns
  tab_stat_mean_sd_n() %>% # Calculate mean, standard deviation, and N
  tab_pivot() %>% # Pivot the table
  set_caption("Horse power by car brand.") # Add a caption
Horse power by car brand.
 Brand of car 
 Akiko   Erikson   Hutch   Meyer 
 Horse Power 
   Mean  62.5 32740 180 124.0
   Std. dev.  24.7 36.8
   Unw. valid N  2.0 1 1 2.0

Handling Large Datasets

As datasets grow, importing and managing them in memory can become challenging. readstata13 provides features to work efficiently with large dta files.

Partial Reading

To avoid loading an entire large dataset when only a subset is needed, readstata13 allows you to read specific rows or columns. This is particularly useful for exploring large files or extracting key variables without consuming excessive memory or time.

# Read only the first 3 rows of the dataset
dat_1 <- read.dta13("res/cars.dta", select.rows = c(1,3)); dat_1
#>   speed dist
#> 1     4    2
#> 2     4   10
#> 3     7    4

# Read only the 'dist' variable from the dataset
dat_2 <- read.dta13("res/cars.dta", select.cols = "dist"); head(dat_2)
#>   dist
#> 1    2
#> 2   10
#> 3    4
#> 4   22
#> 5   16
#> 6   10

A practical application of partial reading is working with large survey datasets like the SOEP (German Socio-Economic Panel).4 These datasets are often distributed across multiple files, structured like tables in a database. To link information across files, you need key identifier variables. Instead of importing entire multi-gigabyte files just to get a few ID columns, you can use select.cols to quickly and efficiently read only the necessary variables.

Compression

When saving data to a dta file, you can use the compress = TRUE option in save.dta13. This instructs the package to use the smallest possible Stata data type for each variable, potentially reducing the file size.

# Save the cars dataset with compression enabled
save.dta13(cars, file = "res/cars_compress.dta", compress = TRUE)

# Import the compressed file and check the resulting data types
dat2 <- read.dta13(file = "res/cars_compress.dta")
attr(dat2, "types")
#> [1] 65530 65529

In this example, the numeric vector in R was safely stored as an integer in the compressed dta file because its values fit within the integer range. The main benefit of compression is the reduction in file size. The only notable change is that after re-import, the former numeric column has become an integer.

rbind(file.info("res/cars.dta")["size"],
      file.info("res/cars_compress.dta")["size"])
#>                       size
#> res/cars.dta          1762
#> res/cars_compress.dta 1112

Advanced Features

Frames

Stata version 16 introduced the concept of data frames, allowing multiple datasets to be held in memory simultaneously and saved together in a “.dtas” file (a Stata frameset). A “.dtas” file is essentially a zip archive containing a separate dta file for each frame.

The get.frames function in readstata13 can inspect a “.dtas” file and list the names (defined within Stata), the internal filename and version of the frames it contains:

dtas_path <- system.file("extdata", "myproject2.dtas",
                         package="readstata13")

# Get information about frames in the .dtas file
get.frames(dtas_path)
#>       name      filename version
#> 1  persons  persons~0000     120
#> 2 counties counties~0001     118

To import data from a “.dtas” file, use read.dtas. By default, it imports all frames and returns them as a named list of R data frames.

# Read all frames from the .dtas file
read.dtas(dtas_path)
#> Warning in stata_read(filepath, missing.type, select.rows, select.cols_chr, :
#> File contains unhandled alias variable in column: 5
#> $persons
#>    personid countyid income counties median     ratio
#> 1         1        5  30818        5        0.7038001
#> 2         2        3  30752        3        0.4225046
#> 3         3        2  29673        2        0.5230381
#> 4         4        3  32115        3        0.4412310
#> 5         5        2  31189        2        0.5497603
#> 6         6        1  30992        1        0.6725256
#> 7         7        3  34328        3        0.4716356
#> 8         8        3  31508        3        0.4328914
#> 9         9        5  26071        5        0.5953915
#> 10       10        5  29768        5        0.6798210
#> 11       11        2  34757        2        0.6126525
#> 12       12        3  25630        3        0.3521330
#> 13       13        1  29146        1        0.6324675
#> 14       14        5  25752        5        0.5881063
#> 15       15        1  26806        1        0.5816895
#> 16       16        2  34368        2        0.6057957
#> 17       17        3  26914        3        0.3697740
#> 18       18        2  25886        2        0.4562857
#> 19       19        1  29321        1        0.6362650
#> 20       20        5  29571        5        0.6753220
#> 
#> $counties
#>    countyid median_income
#> 1    Brazos         46083
#> 2    Dallas         56732
#> 3    Travis         72785
#> 4    Harris         58664
#> 5    Potter         43788
#> 6   El Paso         44120
#> 7     Bowie         49153
#> 8 Galveston         69674

You can import only specific frames using the select.frames argument:

# Read only the "counties" frame
read.dtas(dtas_path, select.frames = "counties")
#> $counties
#>    countyid median_income
#> 1    Brazos         46083
#> 2    Dallas         56732
#> 3    Travis         72785
#> 4    Harris         58664
#> 5    Potter         43788
#> 6   El Paso         44120
#> 7     Bowie         49153
#> 8 Galveston         69674

Furthermore, you can apply specific read.dta13 options to individual frames within the “.dtas” file by providing a list to the read.dta13.options argument. The list structure should be list(framename = list(param = value)).

# Read frames with different column selections for each
read.dtas(dtas_path,
          read.dta13.options = list(counties = list(select.cols = "median_income"),
                                    persons = list(select.cols = "income")))
#> $persons
#>    income
#> 1   30818
#> 2   30752
#> 3   29673
#> 4   32115
#> 5   31189
#> 6   30992
#> 7   34328
#> 8   31508
#> 9   26071
#> 10  29768
#> 11  34757
#> 12  25630
#> 13  29146
#> 14  25752
#> 15  26806
#> 16  34368
#> 17  26914
#> 18  25886
#> 19  29321
#> 20  29571
#> 
#> $counties
#>   median_income
#> 1         46083
#> 2         56732
#> 3         72785
#> 4         58664
#> 5         43788
#> 6         44120
#> 7         49153
#> 8         69674

Long Strings (strL) and Binary Data

Stata 13 introduced “long strings” (strL), capable of storing very large text values. These are stored separately from the main data matrix in the dta file, with only a reference kept in the data part. readstata13 handles these; by default, they are read into R character vectors.

Interestingly, Stata also allows embedding binary data (like images, audio, or other files) within strL variables.5 While R’s standard data structures aren’t ideal for directly handling such embedded binary data within a data frame,6 readstata13 version 0.9.1 and later provides the strlexport option to extract these binary contents to files.

Using strlexport = TRUE and specifying a path with strlpath, you can save the contents of strL variables as separate files in a designated directory.

# Create a directory for exporting strLs
dir.create("res/strls/")

# Read a dta file containing strLs and export their content
dat_strl <- read.dta13("stata_strl.dta", 
                       strlexport = TRUE, 
                       strlpath = "res/strls/")

# List the files created in the export directory.
# The filenames indicate the variable and observation index (e.g., 15_1).
dir("res/strls/")
#> [1] "15_1" "16_1"

The exported files do not have extensions because the file type is not inherently known from the strL data itself (and could vary cell by cell). The user is responsible for determining the correct file type and processing the content. In this example, the first exported file (15_1) is a text file.

# Read the content of the text file strL export
readLines("res/strls/15_1")
#> [1] "R is a free software environment for statistical computing and graphics. It compiles and runs on a wide variety of UNIX platforms, Windows and MacOS. To download R, please choose your preferred CRAN mirror."
#> [2] ""                                                                                                                                                                                                              
#> [3] "If you have questions about R like how to download and install the software, or what the license terms are, please read our answers to frequently asked questions before you send an email."                   
#> [4] ""

The second file (16_1) is a PNG image. You can read and display it using appropriate R packages like png and grid.

library(png)
library(grid) # grid is needed for grid.raster

# Read the PNG image file
img <- readPNG("res/strls/16_1")

# Display the image
grid::grid.raster(img)

Display of the R logo extracted from a long string.


  1. The dta format for current versions is well documented at https://www.stata.com/help.cgi?dta and also in the corresponding manuals.↩︎

  2. A detailed explanation can be found here: https://en.wikipedia.org/wiki/Endianness.↩︎

  3. A development branch on GitHub even include support for the rarely seen 116 format, for which only one public sample file is known to exist.↩︎

  4. The SOEP is currently located at the DIW Berlin.↩︎

  5. A Stata blog post illustrates this feature, showing how physicians could store X-ray images alongside patient data: “In the spotlight: Storing long strings and entire files in Stata datasets”.↩︎

  6. The challenge lies in R’s vector types; standard character vectors aren’t designed for arbitrary binary data, and there’s no native vector type for image processing or other binary formats within a data frame context. This also means readstata13 currently cannot create dta files with embedded binary data from R.↩︎

readstata13/inst/doc/readstata13_basic_manual.Rmd0000644000176200001440000003566615002650213021400 0ustar liggesusers--- title: "readstata13: Basic Manual" author: "Jan Marvin Garbuszus & Sebastian Jeworutzki" date: "`r Sys.Date()`" output: rmarkdown::html_vignette: toc: true vignette: > %\VignetteIndexEntry{readstata13: Basic Manual} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r setup, include = FALSE} library(readstata13) dir.create("res") options(rmarkdown.html_vignette.check_title = FALSE) knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ``` The `readstata13` package was developed to address compatibility issues arising from changes in the Stata 13 dta file format. Prior to Stata 13, packages like `foreign` could handle dta files. However, Stata 13 introduced a new format that resembles XML.[^1] Recognizing the need for a new solution, we (Jan Marvin Garbuszus and Sebastian Jeworutzki) created `readstata13`. Leveraging Rcpp for performance, the package has evolved into a comprehensive tool for working with dta files in R. [^1]: The dta format for current versions is well documented at and also in the corresponding manuals. Key features of `readstata13` include: * **Broad Format Support:** Ability to import and export dta files across a wide range of Stata versions, including many undocumented formats. * **Handling Advanced Features:** Support for features like string encoding, multilingual labels, business calendars, long strings (`strL`), frames, and embedded binary data. * **Enhanced Functionality:** Built as a direct replacement for `foreign`'s dta functions, with added capabilities for improved label handling (including generation) and partial data reading (selecting specific rows or variables). ## Core Functionality: Reading and Writing Stata files Importing a Stata file using `readstata13` is straightforward, similar to using the `foreign` package. The primary function is `read.dta13`. To save an R data frame to the Stata dta format, you use the `save.dta13` function. ```{R} data (cars) # Save the 'cars' dataset to a Stata file save.dta13(cars, file = "res/cars.dta") # Read the saved Stata file back into R dat <- read.dta13("res/cars.dta") ``` Beyond the data itself, `readstata13` preserves important metadata from the Stata file. This information is stored as attributes of the imported data frame. ```{R} # prints the attributes attributes(dat) ``` Examining the attributes reveals details such as the Stata format version (e.g., format 117, introduced in Stata 13), a data label, a timestamp, and information about the data types and formats used in Stata. In this example, the `save.dta13` function wrote the numeric data from R as binary `double`s in the dta file. The byte order (endianness) is also recorded; `readstata13` is designed to handle both Little Endian (used here) and Big Endian formats during reading and writing.[^2] [^2]: A detailed explanation can be found here: . The package automatically manages the conversion of Stata's missing values, value labels, and variable labels during both import and export. ## Supported Stata Versions A key advantage of `readstata13` is its ability to write dta files compatible with older and newer versions of Stata. This is controlled using the `version` argument in the `save.dta13` function. The table below lists supported Stata versions and their corresponding file formats: | Stata Version | File Format | |---------------|-------------| | 18 - 19 | 121 | | 18 - 19 | 120 | | 15 - 19 | 119 | | 14 - 19 | 118 | | 13 | 117 | | 12 | 115 | | 10 - 11 | 114 | | 8 - 9 | 113 | | 7 | 110 | | 6 | 108 | While this table shows the most common formats, `readstata13` supports reading files from Stata version 1 (format 102) up to the latest format 121 (used for files with over 32,767 variables, readable by Stata 18 & 19 MP).[^4] The dta format has evolved over time to accommodate larger datasets and longer variable names or labels. Although `readstata13` can read virtually any format, its ability to write files that *fit* within Stata's historical limits depends on the data size. For general compatibility, it's recommended to target versions 7 or later (formats 110+), which aligns with the default in `foreign::write.dta`. [^4]: A [development branch](https://github.com/sjewo/readstata13/tree/116) on GitHub even include support for the rarely seen `116` format, for which only one public sample file is known to exist. Here's an example of saving a file compatible with Stata 7: ```{r} # Save the cars dataset as a Stata 7 dta file save.dta13(cars, "res/cars_version.dta", version = 7) # Read the file back and check its reported version dat3 <- read.dta13("res/cars_version.dta") attr(dat3, "version") ``` ## Working with Labelled Data Stata datasets often include rich metadata like variable and value labels. Since base R data frames don't natively support this, `readstata13` stores this information in various attributes of the imported data frame, mirroring the approach used by `foreign::read.dta`. Let's use the example dataset "statacar.dta" included with the `readstata13` package. We'll initially import it without converting categorical data to R factors, keeping the original numeric codes. ```{r} library(readstata13) x <- read.dta13(system.file("extdata/statacar.dta", package = "readstata13"), convert.factors = FALSE) ``` Variable labels are accessible via the `var.labels` attribute: ```{r} attr(x, "var.labels") ``` You can retrieve the label for a specific variable using the `varlabel()` function: ```{r} varlabel(x, var.name = "type") ``` Value labels, which map numeric codes to descriptive text, are stored in a more structured way. The `val.labels` attribute indicates which variables have associated value labels. The actual label definitions (the mapping from codes to labels) are stored as a list in the `label.table` attribute. In our example dataset, only one column has value labels: ```{r} attr(x, "val.labels") ``` The corresponding label table for the 'type' variable is named `type_en`. It's a named vector where the numeric codes are the vector values and the labels are the names: ```{r} attr(x, "label.table")$type_en ``` Convenience functions like `get.label.name()` and `get.label()` provide alternative ways to access this information: ```{r} get.label.name(x, var.name = "type") get.label(x, "type_en") ``` A common task is converting a numeric variable with value labels into an R factor. `readstata13` simplifies this with the `set.label()` function, which uses the stored label information to create the factor levels. ```{r} # Create a factor variable 'type_en' from the 'type' variable using stored labels x$type_en <- set.label(x, "type") # Display the original numeric column and the new factor column x[, c("type", "type_en")] ``` ### Multi-Language Support for Labels Stata allows datasets to include labels in multiple languages. `readstata13` supports this, and the `lang` option in `set.label()` lets you specify which language's labels to use when creating a factor. ```{r} # Check available languages and the default language get.lang(x) # Create a factor using the German labels x$type_de <- set.label(x, "type", lang = "de") # Display the original and both language factor columns x[, c("type", "type_en", "type_de")] ``` ### Compatibility with Other Packages `readstata13` is designed to integrate well with other R packages that work with labelled data, such as `labelled` and `expss`. ```{r, eval = isTRUE(requireNamespace("labelled"))} # Requires labelled package version > 2.8.0 due to a past bug library(labelled) # Read the data and convert to the 'labelled' class format xl <- read.dta13(system.file("extdata/statacar.dta", package = "readstata13"), convert.factors = FALSE) xl <- to_labelled(xl) xl ``` Packages like `expss` can utilize the label information stored by `readstata13` (and converted by `labelled`) for creating descriptive tables and plots. ```{r, eval = isTRUE(requireNamespace("expss")) & isTRUE(requireNamespace("labelled"))} library(expss) # Example: Use expss to create a table summarizing horse power by car brand # First, handle missing or negative HP values xl[xl$hp < 0 | is.na(xl$hp), "hp"] <- NA # Create the table using expss piping syntax xl %>% tab_cells(hp) %>% # Specify the variable for cells tab_cols(brand) %>% # Specify the variable for columns tab_stat_mean_sd_n() %>% # Calculate mean, standard deviation, and N tab_pivot() %>% # Pivot the table set_caption("Horse power by car brand.") # Add a caption ``` ## Handling Large Datasets As datasets grow, importing and managing them in memory can become challenging. `readstata13` provides features to work efficiently with large dta files. ### Partial Reading To avoid loading an entire large dataset when only a subset is needed, `readstata13` allows you to read specific rows or columns. This is particularly useful for exploring large files or extracting key variables without consuming excessive memory or time. ```{r} # Read only the first 3 rows of the dataset dat_1 <- read.dta13("res/cars.dta", select.rows = c(1,3)); dat_1 # Read only the 'dist' variable from the dataset dat_2 <- read.dta13("res/cars.dta", select.cols = "dist"); head(dat_2) ``` A practical application of partial reading is working with large survey datasets like the SOEP (German Socio-Economic Panel).[^5] These datasets are often distributed across multiple files, structured like tables in a database. To link information across files, you need key identifier variables. Instead of importing entire multi-gigabyte files just to get a few ID columns, you can use `select.cols` to quickly and efficiently read only the necessary variables. [^5]: The SOEP is currently located at the [DIW Berlin](https://www.diw.de/). ### Compression When saving data to a dta file, you can use the `compress = TRUE` option in `save.dta13`. This instructs the package to use the smallest possible Stata data type for each variable, potentially reducing the file size. ```{r} # Save the cars dataset with compression enabled save.dta13(cars, file = "res/cars_compress.dta", compress = TRUE) # Import the compressed file and check the resulting data types dat2 <- read.dta13(file = "res/cars_compress.dta") attr(dat2, "types") ``` In this example, the `numeric` vector in R was safely stored as an `integer` in the compressed dta file because its values fit within the integer range. The main benefit of compression is the reduction in file size. The only notable change is that after re-import, the former `numeric` column has become an `integer`. ```{r} rbind(file.info("res/cars.dta")["size"], file.info("res/cars_compress.dta")["size"]) ``` ## Advanced Features ### Frames Stata version 16 introduced the concept of data [frames](https://www.stata.com/help.cgi?frames), allowing multiple datasets to be held in memory simultaneously and saved together in a ".dtas" file (a Stata frameset). A ".dtas" file is essentially a zip archive containing a separate dta file for each frame. The `get.frames` function in `readstata13` can inspect a ".dtas" file and list the names (defined within Stata), the internal filename and version of the frames it contains: ```{r} dtas_path <- system.file("extdata", "myproject2.dtas", package="readstata13") # Get information about frames in the .dtas file get.frames(dtas_path) ``` To import data from a ".dtas" file, use `read.dtas`. By default, it imports all frames and returns them as a named list of R data frames. ```{r} # Read all frames from the .dtas file read.dtas(dtas_path) ``` You can import only specific frames using the `select.frames` argument: ```{r} # Read only the "counties" frame read.dtas(dtas_path, select.frames = "counties") ``` Furthermore, you can apply specific `read.dta13` options to individual frames within the ".dtas" file by providing a list to the `read.dta13.options` argument. The list structure should be `list(framename = list(param = value))`. ```{r} # Read frames with different column selections for each read.dtas(dtas_path, read.dta13.options = list(counties = list(select.cols = "median_income"), persons = list(select.cols = "income"))) ``` ### Long Strings (strL) and Binary Data Stata 13 introduced "long strings" (`strL`), capable of storing very large text values. These are stored separately from the main data matrix in the dta file, with only a reference kept in the data part. `readstata13` handles these; by default, they are read into R character vectors. Interestingly, Stata also allows embedding binary data (like images, audio, or other files) within `strL` variables.[^6] While R's standard data structures aren't ideal for directly handling such embedded binary data within a data frame,[^7] `readstata13` version `0.9.1` and later provides the `strlexport` option to extract these binary contents to files. [^6]: A Stata blog post illustrates this feature, showing how physicians could store X-ray images alongside patient data: ["In the spotlight: Storing long strings and entire files in Stata datasets"](https://www.stata.com/stata-news/news31-4/spotlight/). [^7]: The challenge lies in R's vector types; standard character vectors aren't designed for arbitrary binary data, and there's no native vector type for image processing or other binary formats within a data frame context. This also means `readstata13` currently cannot create dta files *with* embedded binary data from R. Using `strlexport = TRUE` and specifying a path with `strlpath`, you can save the contents of `strL` variables as separate files in a designated directory. ```{r} # Create a directory for exporting strLs dir.create("res/strls/") # Read a dta file containing strLs and export their content dat_strl <- read.dta13("stata_strl.dta", strlexport = TRUE, strlpath = "res/strls/") # List the files created in the export directory. # The filenames indicate the variable and observation index (e.g., 15_1). dir("res/strls/") ``` The exported files do not have extensions because the file type is not inherently known from the `strL` data itself (and could vary cell by cell). The user is responsible for determining the correct file type and processing the content. In this example, the first exported file (`15_1`) is a text file. ```{r} # Read the content of the text file strL export readLines("res/strls/15_1") ``` The second file (`16_1`) is a PNG image. You can read and display it using appropriate R packages like `png` and `grid`. ```{r, fig.alt="Display of the R logo extracted from a long string."} library(png) library(grid) # grid is needed for grid.raster # Read the PNG image file img <- readPNG("res/strls/16_1") # Display the image grid::grid.raster(img) ``` ```{r include=FALSE} # Clean up the created directory and files unlink("res/", recursive = TRUE) ``` readstata13/inst/extdata/0000755000176200001440000000000015002626101014766 5ustar liggesusersreadstata13/inst/extdata/encodecp.dta0000644000176200001440000000121714372711643017260 0ustar liggesusersst9;9t9ߖJ 1 Sep 2016 17:16numchr%8.0g%9snumlabeltmp/sd04321.000000"cp.dta"acter vtmp/sd04321.000000"cp.dta"acter vEUROEGnumlabel;9t9 EUROEreadstata13/inst/extdata/encode.do0000644000176200001440000000037114372711643016567 0ustar liggesusersclear all set obs 6 gen int num = _n label variable num äöüß label define numlabel 1 "ä" 2 "ö" 3 "ü" 4 "ß" 5 "€" 6 "Œ" label values num numlabel // create character variable from labels decode num, gen(chr) save "encode.dta", replace readstata13/inst/extdata/statacar.do0000644000176200001440000000363714372711643017144 0ustar liggesusers clear all input int(id) str20 brand str20 model long(type) int(hp) double(max) float(mileage) byte(ecar) long(ldate) str20(ldatecal) 1 "Meyer" "Speed Start 2000" 2 150 176.5 10.2 0 1 2001-01-03 2 "Meyer" "Happy Family" 4 98 145 5.6 0 247 2001-12-31 3 "Akiko" "Susumu 1" 3 45 118.7 -1 0 14 2001-01-23 4 "Akiko" "Susumu 3" 4 80 127.3 6.8 0 134 2001-07-16 5 "Hutch" "Lumberjack 3000" 1 180 156.2 14.2 0 110 2001-06-11 6 "Erikson" "E-Car 2000" 3 . . -2 1 100 2001-05-25 7 "Erikson" "Maxinator" 2147483620 32740 8.988e+307 1.701e+38 100 19 2001-01-30 7 "Erikson" "Mimizer" -2147483647 -32767 -1.798e+308 -1.701e+38 -127 1 2001-01-03 end gen ldatecal2 = date(ldatecal, "YMD") generate strL modelStrL = model drop ldatecal rename ldatecal2 ldatecal // bcal uses a special format. // %tb for business calendar and following the calendar name format ldatecal %td format ldate %tbsp500 // missings replace mileage = .a if mileage ==-1 // no info replace mileage = .b if mileage ==-2 // not applicable // Label en label language en, rename label var id "Numeric ID" label var brand "Brand of car" label var type "Car classification" label var model "Car model" label var hp "Horse Power" label var max "Maximum speed" label var ldate "Launch date" label var ldatecal "Launch date (calendar)" label define type_en 1 "Off-Road" 2 "Roadster" 3 "City car" 4 "Family car" 2147483620 "max" -2147483647 "min", modify label value type type_en // Label de label language de, new label var id "Numerische ID" label var brand "Herstellermarke" label var type "Klassifikation" label var model "Automodell" label var hp "Pferdestrken" label var max "Hchstgeschwindigkeit" label var ldate "Einfhrungsdatum" label var ldatecal "Einfhrungsdatum (Kalender)" label define type_de 1 "Gelndewagen" 2 "Sportwagen" 3 "Stadtauto" 4 "Familienauto" 2147483620 "max" -2147483647 "min", modify label value type type_de label language en save "statacar.dta", replace readstata13/inst/extdata/missings_msf.dta0000644000176200001440000000256614372711643020211 0ustar liggesusers
117MSF05 Sep 2016 20:29
 I >Mjvbins%9.0g%9.0g%9.0g%2s?1
readstata13/inst/extdata/gen_fac.dta0000644000176200001440000000252414372711643017064 0ustar liggesusers
118LSF12 Jul 2016 00:22
<7CfuHTv1%9.0gv1?c`)`)K`)K`)?c?d?c`)`)?cKPO?cKPO L L?d  `)`)K"?c?cKv1K?d`)?d`)K?c`)one
readstata13/inst/extdata/nonint.do0000644000176200001440000000020414372711643016632 0ustar liggesusersclear all set obs 2 gen double v1 = _n recode v1 2 = 1.2 label define v1 1 "one" label values v1 v1 save "nonint.dta", replace readstata13/inst/extdata/myproject2.dtas0000644000176200001440000001212415002626101017741 0ustar liggesusersPKRY .frameinfoRTs S0RTp q sr Rt(p%d+ )CCC .PK) APPKRYpersons~0000.dtaZ[hUwggRtTö*fv'i.a&aRmb" dw03PAEA7蓈"TD[AjEH5桘Vx.sv6 ?9; r\ճyWWи [Aac(i9$@#3Q c&:IG4$]HVxՐk ^4''ƒrJN{3)I(V zOS=*Gk?k"k߱&Fs{}ݣys"&uG,#Δ GY^Y^oIk sQ?N)S*yDe4Qtg܀Y̕ i8pD聃0D @df;42- k0ǃ ʕ0@iݘṨ5ՎIo`. {V3ꋣ:sww $ !}F.<M8)&ܞ<['zߩbu2+~_qu8L{xK!!lx.M8x' kKq _ -)` ,: l @_$Bn'Ru2ʍ붞s 2̜ Oè uJrx\ɲq0;j떩;Y/;୵>[U{yM!]RC*1c|J=?!0003jMfJHK BRaPXr jB߹s &jq|?l@ekT& +=ax,@8Hn%8WCĥ"D8綅j*cIz*c rLl@LՅQ!ِԱx%(,Ca@ I+Iag!EƲ1Cd_p`g&)hTf;XHwDEq-;I꤯/ۆtgz%;'ct3R6' OoD%RA#G^hL=D91jˤ3s/KN{8Ě,;%"ܒ&jT%Mx! WE5&o˚PxYZLO"Z{;vŇוZKM/h'vu"&Tpf(z;ךLN_"vRhW["`}EZLMQE ;f/iB% [-:tjQZ/{ԄM\ܡ"Y r\Y+ɣUHοPKu|#PKRYpersons~0000.hdrZˋEt&q7vuLԽPt.H*10̶cAAA^˾ϳ8y}q}2 "epVWds 95 C@! Ęz䈀P(b{ 1>ԉ}>ouQn跊L/3S~45nʿH{9}r~jX3@K+\*?7ԥ7b=oߪ} p+[tz[D.:l>^t[[?_uNAD($wvn99I/O+M5mw5%hI%*Fy;C̈́hd `t+(pFW8y\f݄f0b2C`ݠ ƢtFN$eƜ4'O9x؄zs6^_71b,MiNFr1ԸL8aG~O:cSVܧ2z3EoўA+Qs(Ruthjǁ'JwEUxCȔjEߨ7 WG-Wv~Bnsm`+5TzIw}#" zw X4þn? }=*i4'1eEtNvB۱ź ͚V[[0236O]) ޕ?DF;TadJ)å5 7H`O|`P7$ 4Q2U( |96D-o]Fy- WedîEZBlwLXHF\K)aL:WEIMۚT 4G(O<.(9Phz@t ydEcG3F`EltJt:'oEXiM@({fw8؄ll׿EH~20UU$bqNi/6kuF^[B׮{,JP`ɸ U%i?Q`J0&seW`Q ".2dATPK ` PKRYcounties~0001.hdrW_kTGMZQBCEkwvWi_0!,w' ޹J_P~P}PڂEJ13,M^=sfΝsp-m Q%`%͞RYR2`9Qrыlr%MxS$3tAb8d*5dXhJ 5q*4g{1|Xj}5YS2إ$Xyg?&<5[ӱǟum>ح踧XƯh^.4oUJPX,Ú>h8 :JpTKeX\DI.Irف+ewƁm8/RtےB4ʨlx^_ps&m0*$(M>;^ V Zc~tra}OB6Pʎx~j) ί?/>*Y4OȧN'^: 4a἟ߙmKX_4ù@6pM 1I6W<2BAʼn#`-2ОY-h>Ѧ$id҈U`\ԇ(zE%`Z˯:cԵ]?.;~wG C.-d:䓄t݊Y"21<"tI*,[s噓j(\d pMx񒊗'eֲP<>] %xQ)O| |]9 -D+PZrrY6 Ѻbo(״,n{*}?}Gyr|_YϡPNr*R.9*}ED9Ch;]0a6t^Zն?{a3.5-(މ5 IMn,t-3Hd PK⸳ PKRY) AP .frameinfoPKRYu|#ypersons~0000.dtaPKRYJ)"3persons~0000.hdrPKRY `  counties~0001.dtaPKRY⸳ counties~0001.hdrPK2 readstata13/inst/extdata/gen_fac.do0000644000176200001440000000016614372711643016716 0ustar liggesusersclear all set obs 2 gen v1 = _n label define v1 1 "one" label values v1 v1 compress save "gen_fac.dta", replace readstata13/inst/extdata/missings.do0000644000176200001440000000057714372711643017176 0ustar liggesusersclear all set obs 27 gen missing = _n mvdecode missing, mv( 1 = . \ 2 = .a \ 3 = .b \ 4 = .c \ 5 = .d \ 6 = .e \ 7 = .f \ /// 8 = .g \ 9 = .h \ 10 = .i \ 11 = .j \ 12 = .k \ 13 = .l \ 14 = .m \ /// 15 = .n \ 16 = .o \ 17 = .p \ 18 = .q \ 19 = .r \ 20 = .s \ 21 = .t \ /// 22 = .u \ 23 = .v \ 24 = .w \ 25 = .x \ 26 = .y \ 27 = .z ) save "missings.dta", replace readstata13/inst/extdata/underscore.do0000644000176200001440000000020214372711643017474 0ustar liggesusersclear all set obs 2 gen v_1 = _n gen v_2 = _n gen long_name_multiple_underscores = _n compress save "underscore.dta", replace readstata13/inst/extdata/missings_lsf.dta0000644000176200001440000000256614372711643020210 0ustar liggesusers
117LSF05 Sep 2016 20:27
I >Mjvbins%9.0g%9.0g%9.0g%2s?1
readstata13/inst/extdata/encode.dta0000644000176200001440000000404714372711643016741 0ustar liggesusers
118LSF 1 Sep 2016 17:13
>Up 'numchr%8.0g%9snumlabeläöüßcVpcVKpcVnKpcV::pcVpcV:ՐK:KqLXL  pcVpcVՐK::KäöüßcVpcVKpcVnKpcV::pcVpcV:ՐK:KqLXL  pcVpcVՐK::Käöü߀ŒKnumlabelpcVpcVՐK:pcV äöü߀Œ
readstata13/inst/extdata/sp500.stbcal0000644000176200001440000000073614372711643017054 0ustar liggesusers* Business calendar "sp500" created by -bcal create- * Created/replaced on 18 Nov 2014 version 12.1 purpose "S&P 500 for 2001" dateformat ymd range 2001jan02 2001dec31 centerdate 2001jan02 omit dayofweek (Sa Su) omit date 2001jan15 omit date 2001feb19 omit date 2001apr13 omit date 2001may28 omit date 2001jul04 omit date 2001sep03 omit date 2001sep11 omit date 2001sep12 omit date 2001sep13 omit date 2001sep14 omit date 2001nov22 omit date 2001dec25 readstata13/inst/extdata/datetime.dta0000644000176200001440000001075014372711643017276 0ustar liggesusers
118LSF21 May 2021 14:54
6c~ 7 tdh_timetcit_timetc_hh_mmetyeartmthlytqrterly%tdgMM%tc0g%tcHH:MM%tyg%tmg%tqgNC>*p z8Np zhN0bNC>*ЪNp z8NYQpFep z[QpFeBQp z)Qp z p zp zeYQNC>*p z8Np zhN0bNC>*ЪNp z8NYQpFep z[QpFeBQp z)Qp z p zp zeYQNC>*p z8Np zhN0bNC>*ЪNp z8NYQpFep z[QpFeBQp z)Qp z p zp zeYQNC>*p z8Np zhN0bNC>*ЪNp z8NYQpFep z[QpFeBQp z)Qp z p zp zeYQNC>*p z8Np zhN0bNC>*ЪNp z8NYQpFep z[QpFeBQp z)Qp z p zp zeYQNC>*p z8Np zhN0bNC>*ЪNp z8NYQpFep z[QpFeBQp z)Qp z p zp zeYQlF|wBtwB`D@DMC_FXwBņwB`DDLC(E*bwBjwB`DDMCnCTwBwBD@DPCF%xBz,9&xBDDRC
readstata13/inst/extdata/nonint.dta0000644000176200001440000000060714372711643017007 0ustar liggesuserssc`-g12 Jul 2016 00:54v1%10.0gv1c&D0cpD[\]^?333333?v1pJ/ponereadstata13/inst/extdata/missings.dta0000644000176200001440000000242714372711643017340 0ustar liggesusers
118LSF11 Jul 2016 23:28
<7Cf missings%9.0g=  K K =?d=  =KB=KB L L?d    KA==K (08@HPX`hpx
readstata13/inst/extdata/datetime.do0000644000176200001440000000265314372711643017133 0ustar liggesusers // do file used to create stata datetimes // commands used: https://www.stata.com/manuals/ddrop.pdf . use "https://www.stata-press.com/data/r17/visits", replace . generate admit = date(admit_d, "YMD") . generate dob = date(dateofbirth, "MDY") . list admit_d admit dateofbirth dob . format admit dob %td . list admit dob . generate double admit_time = clock(admit_t, "YMDhms") . generate double disch_time = clock(discharge_t, "YMDhm") . format admit_time disch_time %tc . list admit_time disch_time . format disch_time %tcHH:MM . list discharge_t disch_time . generate double admit_Time = Clock(admit_t, "YMDhms") . format admit_Time %tC . generate admonth = month(admit) . generate adyear = year(admit) . format adyear %ty // inserted by me . list admit admonth adyear . generate monthly = ym(adyear,admonth) . format monthly %tm . list admit monthly . generate monthly2 = ym(year(admit), month(admit)) . format monthly2 %tm . generate dateoftime = dofc(admit_time) . format dateoftime %td . list admit_time dateoftime . generate monthofdate = mofd(admit) . format monthofdate %tm . list admit monthofdate . generate quarterly = qofd(dofm(monthofdate)) . format quarterly %tq . list monthofdate quarterly // trim down . keep dob adyear disch_time admit_time monthly quarterly // rename . rename (dob admit_time disch_time monthly quarterly adyear) (td tc tc_hh_mm tm tq ty) // save save "readstata13/inst/extdata/datetime.dta", replace readstata13/inst/extdata/statacar.dta0000644000176200001440000002751714372711643017315 0ustar liggesusers
118LSF  6 Sep 2016 14:04
P UC),*-C/O/ide20brandsmodeltypehpdroommaxknmileageecarhldateg_circleldatecalmodelStrL%8.0g%8.0gc%8.0g%8.0g%20sc%8.0g%8.0gc%8.0g%%20s%8.0g%6.2f%8.0g%13.0g%8.0g%10.0g%9.0g%8.0g%tbsp500%tdg%9sgakenakeodeltype_enurationivisionivisionivisionriginriginNumeric IDXKXnKXHLKXXHLՐKpdHL KpdqL XL  XX ՐK7HLHLKBrand of carXKXnKXHLKXXHLՐKpdHL KpdqL XL  XX ՐK7HLHLKCar modelarXKXnKXHLKXXHLՐKpdHL KpdqL XL  XX ՐK7HLHLKCar classificationKXnKXHLKXXHLՐKpdHL KpdqL XL  XX ՐK7HLHLKHorse PowercationKXnKXHLKXXHLՐKpdHL KpdqL XL  XX ՐK7HLHLKMaximum speedtionKXnKXHLKXXHLՐKpdHL KpdqL XL  XX ՐK7HLHLKaximum speedtionKXnKXHLKXXHLՐKpdHL KpdqL XL  XX ՐK7HLHLKaximum speedtionKXnKXHLKXXHLՐKpdHL KpdqL XL  XX ՐK7HLHLKLaunch datedtionKXnKXHLKXXHLՐKpdHL KpdqL XL  XX ՐK7HLHLKLaunch date (calendar)KXnKXHLKXXHLՐKpdHL KpdqL XL  XX ՐK7HLHLKaunch date (calendar)KXnKXHLKXXHLՐKpdHL KpdqL XL  XX ՐK7HLHLK_dta⥂__YZ@@PE`_E` _غZZ@|[_lang_cen_lang_v_en ^ZX ^Zenldatecal__YZ@@PE`_E` _غZZ@|[_lang_v_de!"#$%&'()*+,-./:;<=>?@[\]^Einführungsdatum (Kalender)ldateal__YZ@@PE`_E` _غZZ@|[_lang_v_de!"#$%&'()*+,-./:;<=>?@[\]^Einführungsdatummaxeal__YZ@@PE`_E` _غZZ@|[_lang_v_de!"#$%&'()*+,-./:;<=>?@[\]^Höchstgeschwindigkeithpeal__YZ@@PE`_E` _غZZ@|[_lang_v_de!"#$%&'()*+,-./:;<=>?@[\]^Pferdestärken typeal__YZ@@PE`_E` _غZZ@|[_lang_l_de!"#$%&'()*+,-./:;<=>?@[\]^type_detypeal__YZ@@PE`_E` _غZZ@|[_lang_v_de!"#$%&'()*+,-./:;<=>?@[\]^Klassifikation modelal__YZ@@PE`_E` _غZZ@|[_lang_v_de!"#$%&'()*+,-./:;<=>?@[\]^Automodellbrandal__YZ@@PE`_E` _غZZ@|[_lang_v_de!"#$%&'()*+,-./:;<=>?@[\]^Herstellermarkeidndal__YZ@@PE`_E` _غZZ@|[_lang_v_de!"#$%&'()*+,-./:;<=>?@[\]^Numerische ID_dtaal__YZ@@PE`_E` _غZZ@|[_lang_listRpj0Rpj2en deMeyerordSpeed Start 2000e@f@33#AjF MeyerrdHappy Family!@b b@33@oF AkikoitSusumu 1 #yE@-̬]@XjF AkikonturySusumu 3 (;@P33333_@@mF HutchectraLumberjack 3000q=@fffffc@33cAnlF EriksonbreE-Car 2000+R.@d@lF EriksonreeMaxinator"07@Y#~dtjF EriksonleEMimizer *;@#jF GSO Speed Start 2000GSO  Happy FamilyGSO  Susumu 1GSO  Susumu 3GSO Lumberjack 3000GSO  E-Car 2000GSO  MaxinatorGSO Mimizerptype_deXHLX'' 84#0GeländewagenSportwagenStadtautoFamilienautomaxminftype_enXHLX'' .* &Off-RoadRoadsterCity carFamily carmaxmin
readstata13/inst/extdata/underscore.dta0000644000176200001440000000466114372711643017657 0ustar liggesusers
118LSF 1 Sep 2016 15:07
@]C f y v_1_name_multiple_underscoresv_2_name_multiple_underscoreslong_name_multiple_underscores%9.0g%9.0g%9.0go==K=nK=oPo==oՐKp?oKp?qLXL  ==ՐK?ooKo==K=nK=oPo==oՐKp?oKp?qLXL  ==ՐK?ooKo==K=nK=oPo==oՐKp?oKp?qLXL  ==ՐK?ooK
readstata13/README.md0000644000176200001440000000651215002657623013657 0ustar liggesusersreadstata13: Read and write the ‘Stata’ file format with R ================ [![CRAN status](https://www.r-pkg.org/badges/version/readstata13)](https://cran.r-project.org/package=readstata13) [![Build status](https://github.com/sjewo/readstata13/workflows/R-CMD-check/badge.svg)](https://github.com/sjewo/readstata13/actions?workflow=R-CMD-check) [![CRAN Downloads](https://cranlogs.r-pkg.org/badges/readstata13)](https://cran.r-project.org/package=readstata13) Package to read and write all Stata file formats (version 17 and older) into a R data.frame. The dta file format versions 102 to 121 are supported (including dtas files). The function `read.dta` from the foreign package imports only dta files from Stata versions \<= 12. Due to the different structure and features of dta 117 files, we wrote a new file reader in Rcpp. Additionally the package supports many features of the Stata dta format like label sets in different languages (`?set.lang`) or business calendars (`?as.caldays`). ## Installation The package is hosted on CRAN. ``` r install.packages("readstata13") ``` ## Usage ``` r library(readstata13) dat <- read.dta13("path to file.dta") save.dta13(dat, file="newfile.dta") ``` ## Development Version To install the current release from github you need the platform specific build tools. On Windows a current installation of [Rtools](https://cran.r-project.org/bin/windows/Rtools/) is necessary, while OS X users need to install [Xcode](https://apps.apple.com/us/app/xcode/id497799835). ``` r # install.packages("remotes") remotes::install_github("sjewo/readstata13", ref="0.11") ``` To install the current development version from github: ``` r remotes::install_github("sjewo/readstata13", ref="testing") ``` ## Changelog and Features | Version | Changes | |:---|:---| | 0.11.0 | Initial support for Stata 18. Import .dtas files (Stata framesets) via `read.dtas()`. Alias variables are currently ignored with a warning. | | | The `select.cols` argument accepts either variable names or column indices. | | | Fix compilation on musl and other non-glibc based systems. | | | Add package alias to readstata13.Rd | See [News](NEWS) for the full changelog. ## readstata13 and foreign Most attributes of the resulting data.frame are largely similar to the data.frames produced by `foreign`. Since newer Stata files require some additional attributes, the results of `all.equal()` and `identical()` will be `FALSE` for data.frames read by `foreign::read.dta` and `read.dta13()`. Otherwise, the data.frames produced by both functions are identical. ``` r library(foreign) library(readstata13) # with factors r12 <- read.dta("http://www.stata-press.com/data/r12/auto.dta") r13 <- read.dta13("http://www.stata-press.com/data/r13/auto.dta") all.equal(r12, r13, check.attributes = FALSE) # without factors r12 <- read.dta("http://www.stata-press.com/data/r12/auto.dta", convert.factors = FALSE) r13 <- read.dta13("http://www.stata-press.com/data/r13/auto.dta", convert.factors = FALSE) all.equal(r12, r13, check.attributes = FALSE) ``` ## Authors [Marvin Garbuszus](mailto:jan.garbuszus@ruhr-uni-bochum.de) ([JanMarvin](https://github.com/JanMarvin)) and [Sebastian Jeworutzki](mailto:Sebastian.Jeworutzki@ruhr-uni-bochum.de) ([sjewo](https://github.com/sjewo)) ## Licence GPL2 readstata13/build/0000755000176200001440000000000015002660533013465 5ustar liggesusersreadstata13/build/vignette.rds0000644000176200001440000000033615002660533016026 0ustar liggesusersb```b`aab`b2 1# ')JML).I,I44OJ,LM+M MAS+J XM,NC3JrKv`@ B'k^bnj1q.y) 3GZY_Ӄ -3'foHf e2|s  =XQ` % readstata13/man/0000755000176200001440000000000015002626101013132 5ustar liggesusersreadstata13/man/set.lang.Rd0000644000176200001440000000205414372711643015154 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/tools.R \name{set.lang} \alias{set.lang} \title{Assign Stata Language Labels} \usage{ set.lang(dat, lang = NA, generate.factors = FALSE) } \arguments{ \item{dat}{\emph{data.frame.} Data.frame created by \code{read.dta13}.} \item{lang}{\emph{character.} Label language. Default language defined by \code{\link{get.lang}} is used if NA} \item{generate.factors}{\emph{logical.} If \code{TRUE}, missing factor levels are generated.} } \value{ Returns a data.frame with value labels in language "lang". } \description{ Changes default label language for a dataset. Variables with generated labels (option generate.labels=TRUE) are kept unchanged. } \examples{ dat <- read.dta13(system.file("extdata/statacar.dta", package="readstata13")) get.lang(dat) varlabel(dat) # set German label datDE <- set.lang(dat, "de") get.lang(datDE) varlabel(datDE) } \author{ Jan Marvin Garbuszus \email{jan.garbuszus@ruhr-uni-bochum.de} Sebastian Jeworutzki \email{sebastian.jeworutzki@ruhr-uni-bochum.de} } readstata13/man/get.origin.codes.Rd0000644000176200001440000000205714372711643016605 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/tools.R \name{get.origin.codes} \alias{get.origin.codes} \title{Get Origin Code Numbers for Factors} \usage{ get.origin.codes(x, label.table) } \arguments{ \item{x}{\emph{factor.} Factor to obtain code for} \item{label.table}{\emph{table.} Table with factor levels obtained by \code{\link{get.label}}.} } \value{ Returns an integer with original codes } \description{ Recreates the code numbers of a factor as stored in the Stata dataset. } \details{ While converting numeric variables into factors, the original code numbers are lost. This function reconstructs the codes from the attribute \code{label.table}. } \examples{ dat <- read.dta13(system.file("extdata/statacar.dta", package="readstata13")) labname <- get.label.name(dat,"type") labtab <- get.label(dat, labname) # comparsion get.origin.codes(dat$type, labtab) as.integer(dat$type) } \author{ Jan Marvin Garbuszus \email{jan.garbuszus@ruhr-uni-bochum.de} Sebastian Jeworutzki \email{sebastian.jeworutzki@ruhr-uni-bochum.de} } readstata13/man/stbcal.Rd0000644000176200001440000000274514372711643014720 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/dbcal.R \name{stbcal} \alias{stbcal} \title{Parse Stata business calendar files} \usage{ stbcal(stbcalfile) } \arguments{ \item{stbcalfile}{\emph{stbcal-file} Stata business calendar file created by Stata.} } \value{ Returns a data.frame with two cols: \describe{ \item{range:}{The date matching the businessdate. Date format.} \item{buisdays:}{The Stata business calendar day. Integer format.} } } \description{ Create conversion table for business calendar dates. } \details{ Stata 12 introduced business calendar format. Business dates are integer numbers in a certain range of days, weeks, months or years. In this range some days are omitted (e.g. weekends or holidays). If a business calendar was created, a stbcal file matching this calendar was created. This file is required to read the business calendar. This parser reads the stbcal- file and returns a data.frame with dates matching business calendar dates. A dta-file containing Stata business dates imported with read.stata13() shows in formats which stdcal file is required (e.g. "%tbsp500" requires sp500.stbcal). Stata allows adding a short description called purpose. This is added as an attribute of the resulting data.frame. } \examples{ sp500 <- stbcal(system.file("extdata/sp500.stbcal", package="readstata13")) } \author{ Jan Marvin Garbuszus \email{jan.garbuszus@ruhr-uni-bochum.de} Sebastian Jeworutzki \email{sebastian.jeworutzki@ruhr-uni-bochum.de} } readstata13/man/varlabel.Rd0000644000176200001440000000252614372711643015235 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/tools.R \name{varlabel} \alias{varlabel} \alias{'varlabel<-'} \alias{varlabel<-} \title{Get and assign Stata Variable Labels} \usage{ varlabel(dat, var.name = NULL, lang = NA) varlabel(dat) <- value } \arguments{ \item{dat}{\emph{data.frame.} Data.frame created by \code{read.dta13}.} \item{var.name}{\emph{character vector.} Variable names. If NULL, get label for all variables.} \item{lang}{\emph{character.} Label language. Default language defined by \code{\link{get.lang}} is used if NA} \item{value}{\emph{character vector.} Character vector of size ncol(data) with variable names.} } \value{ Returns an named vector of variable labels } \description{ Retrieve or set variable labels for a dataset. } \examples{ dat <- read.dta13(system.file("extdata/statacar.dta", package="readstata13"), convert.factors=FALSE) # display variable labels varlabel(dat) # display german variable labels varlabel(dat, lang="de") # display german variable label for brand varlabel(dat, var.name = "brand", lang="de") # define new variable labels varlabel(dat) <- letters[1:ncol(dat)] # display new variable labels varlabel(dat) } \author{ Jan Marvin Garbuszus \email{jan.garbuszus@ruhr-uni-bochum.de} Sebastian Jeworutzki \email{sebastian.jeworutzki@ruhr-uni-bochum.de} } readstata13/man/maxchar.Rd0000644000176200001440000000117314372711643015065 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/tools.R \name{maxchar} \alias{maxchar} \title{Check max char length of data.frame vectors} \usage{ maxchar(x) } \arguments{ \item{x}{vector of data frame} } \description{ Stata requires us to provide the maximum size of a charactervector as every row is stored in a bit region of this size. } \details{ Ex: If the max chars size is four, _ is no character in this vector: 1. row: four 3. row: one_ 4. row: ____ If a character vector contains only missings or is empty, we will assign it a value of one, since Stata otherwise cannot handle what we write. } readstata13/man/as.caldays.Rd0000644000176200001440000000200714372711643015461 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/dbcal.R \name{as.caldays} \alias{as.caldays} \title{Convert Stata business calendar dates in readable dates.} \usage{ as.caldays(buisdays, cal, format = "\%Y-\%m-\%d") } \arguments{ \item{buisdays}{numeric Vector of business dates} \item{cal}{data.frame Conversion table for business calendar dates} \item{format}{character String with date format as in \code{\link{as.Date}}} } \value{ Returns a vector of readable dates. } \description{ Convert Stata business calendar dates in readable dates. } \examples{ # read business calendar and data sp500 <- stbcal(system.file("extdata/sp500.stbcal", package="readstata13")) dat <- read.dta13(system.file("extdata/statacar.dta", package="readstata13")) # convert dates and check dat$ldatescal2 <- as.caldays(dat$ldate, sp500) all(dat$ldatescal2==dat$ldatescal) } \author{ Jan Marvin Garbuszus \email{jan.garbuszus@ruhr-uni-bochum.de} Sebastian Jeworutzki \email{sebastian.jeworutzki@ruhr-uni-bochum.de} } readstata13/man/save.dta13.Rd0000644000176200001440000000621415002626101015275 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/save.R \name{save.dta13} \alias{save.dta13} \title{Write Stata Binary Files} \usage{ save.dta13( data, file, data.label = NULL, time.stamp = TRUE, convert.factors = TRUE, convert.dates = TRUE, tz = "GMT", add.rownames = FALSE, compress = FALSE, version = 117, convert.underscore = FALSE ) } \arguments{ \item{data}{\emph{data.frame.} A data.frame Object.} \item{file}{\emph{character.} Path to the dta file you want to export.} \item{data.label}{\emph{character.} Name of the dta-file.} \item{time.stamp}{\emph{logical.} If \code{TRUE}, add a time.stamp to the dta-file.} \item{convert.factors}{\emph{logical.} If \code{TRUE}, factors will be converted to Stata variables with labels. Stata expects strings to be encoded as Windows-1252, so all levels will be recoded. Character which can not be mapped in Windows-1252 will be saved as hexcode.} \item{convert.dates}{\emph{logical.} If \code{TRUE}, dates will be converted to Stata date time format. Code from \code{foreign::write.dta}} \item{tz}{\emph{character.} time zone specification to be used for POSIXct values and dates (if convert.dates is TRUE). ‘""’ is the current time zone, and ‘"GMT"’ is UTC (Universal Time, Coordinated).} \item{add.rownames}{\emph{logical.} If \code{TRUE}, a new variable rownames will be added to the dta-file.} \item{compress}{\emph{logical.} If \code{TRUE}, the resulting dta-file will use all of Statas numeric-vartypes.} \item{version}{\emph{numeric.} Stata format for the resulting dta-file either Stata version number (6 - 16) or the internal Stata dta-format (e.g. 117 for Stata 13). Support for large datasets: Use version="15mp" to save the dataset in the new Stata 15/16 MP file format. This feature is not thoroughly tested yet.} \item{convert.underscore}{\emph{logical.} If \code{TRUE}, all non numerics or non alphabet characters will be converted to underscores.} } \value{ The function writes a dta-file to disk. The following features of the dta file format are supported: \describe{ \item{datalabel:}{Dataset label} \item{time.stamp:}{Timestamp of file creation} \item{formats:}{Stata display formats. May be used with \code{\link[base]{sprintf}}} \item{type:}{Stata data type (see Stata Corp 2014)} \item{var.labels:}{Variable labels} \item{version:}{dta file format version} \item{strl:}{List of character vectors for the new strL string variable type. The first element is the identifier and the second element the string.} } } \description{ \code{save.dta13} writes a Stata dta-file bytewise and saves the data into a dta-file. } \examples{ \dontrun{ library(readstata13) save.dta13(cars, file="cars.dta") } } \references{ Stata Corp (2014): Description of .dta file format \url{https://www.stata.com/help.cgi?dta} } \seealso{ \code{\link[foreign]{read.dta}} in package \code{foreign} and \code{memisc} for dta files from Stata versions < 13 and \code{read_dta} in package \code{haven} for Stata version >= 13. } \author{ Jan Marvin Garbuszus \email{jan.garbuszus@ruhr-uni-bochum.de} Sebastian Jeworutzki \email{sebastian.jeworutzki@ruhr-uni-bochum.de} } readstata13/man/saveToExport.Rd0000644000176200001440000000057714372711643016114 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/tools.R \name{saveToExport} \alias{saveToExport} \title{Check if numeric vector can be expressed as integer vector} \usage{ saveToExport(x) } \arguments{ \item{x}{vector of data frame} } \description{ Compression can reduce numeric vectors as integers if the vector does only contain integer type data. } readstata13/man/read.dtas.Rd0000644000176200001440000000234615002634740015303 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/tools.R \name{read.dtas} \alias{read.dtas} \title{Read frames from Stata dtas files} \usage{ read.dtas(path, select.frames = NULL, read.dta13.options = NULL) } \arguments{ \item{path}{path to .dtas file} \item{select.frames}{character vector} \item{read.dta13.options}{list of parameters used in \code{\link[readstata13]{read.dta13}}. The list must have the following structure: \code{list(framename = list(param = value))}} } \value{ Returns a named list of data.frames. } \description{ Stata 18 introduced framesets (file extension `.dtas`) that contain zipped `dta` files. This helper functions imports those files and returns a list of data.frames. } \examples{ path <- system.file("extdata", "myproject2.dtas", package="readstata13") # read all frames in myproject2.dtas read.dtas(path) # read selected frames read.dtas(path, select.frames = c("persons", "counties")) # read only frame counties read.dtas(path, select.frames = c("counties")) # read frames with different arguments read.dtas(path, read.dta13.options = list(counties = list(select.cols = "median_income"), persons = list(select.cols = "income"))) } readstata13/man/readstata13.Rd0000644000176200001440000000113114470612427015547 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/readstata13.R \docType{package} \name{readstata13} \alias{readstata13} \alias{readstata13-package} \title{Import Stata Data Files} \description{ Function to read the Stata file format into a data.frame. } \note{ If you catch a bug, please do not sue us, we do not have any money. } \seealso{ \code{\link[foreign]{read.dta}} and \code{memisc} for dta files from Stata Versions < 13 } \author{ Marvin Garbuszus \email{jan.garbuszus@ruhr-uni-bochum.de} Sebastian Jeworutzki \email{sebastian.jeworutzki@ruhr-uni-bochum.de} } readstata13/man/read.dta13.Rd0000644000176200001440000001462615002626101015260 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/read.R \name{read.dta13} \alias{read.dta13} \title{Read Stata Binary Files} \usage{ read.dta13( file, convert.factors = TRUE, generate.factors = FALSE, encoding = "UTF-8", fromEncoding = NULL, convert.underscore = FALSE, missing.type = FALSE, convert.dates = TRUE, replace.strl = TRUE, add.rownames = FALSE, nonint.factors = FALSE, select.rows = NULL, select.cols = NULL, strlexport = FALSE, strlpath = ".", tz = "GMT" ) } \arguments{ \item{file}{\emph{character.} Path to the dta file you want to import.} \item{convert.factors}{\emph{logical.} If \code{TRUE}, factors from Stata value labels are created.} \item{generate.factors}{\emph{logical.} If \code{TRUE} and convert.factors is TRUE, missing factor labels are created from integers. If duplicated labels are found, unique labels will be generated according the following scheme: "label_(integer code)".} \item{encoding}{\emph{character.} Strings can be converted from Windows-1252 or UTF-8 to system encoding. Options are "latin1" or "UTF-8" to specify target encoding explicitly. Since Stata 14 files are UTF-8 encoded and may contain strings which can't be displayed in the current locale. Set encoding=NULL to stop reencoding.} \item{fromEncoding}{\emph{character.} We expect strings to be encoded as "CP1252" for Stata Versions 13 and older. For dta files saved with Stata 14 or newer "UTF-8" is used. In some situation the used encoding can differ for Stata 14 files and must be manually set.} \item{convert.underscore}{\emph{logical.} If \code{TRUE}, "_" in variable names will be changed to "."} \item{missing.type}{\emph{logical.} Stata knows 27 different missing types: ., .a, .b, ..., .z. If \code{TRUE}, attribute \code{missing} will be created.} \item{convert.dates}{\emph{logical.} If \code{TRUE}, Stata dates are converted.} \item{replace.strl}{\emph{logical.} If \code{TRUE}, replace the reference to a strL string in the data.frame with the actual value. The strl attribute will be removed from the data.frame (see details).} \item{add.rownames}{\emph{logical.} If \code{TRUE}, the first column will be used as rownames. Variable will be dropped afterwards.} \item{nonint.factors}{\emph{logical.} If \code{TRUE}, factors labels will be assigned to variables of type float and double.} \item{select.rows}{\emph{integer.} Vector of one or two numbers. If single value rows from 1:val are selected. If two values of a range are selected the rows in range will be selected.} \item{select.cols}{\emph{character.} or \emph{numeric.} Vector of variables to select. Either variable names or position.} \item{strlexport}{\emph{logical.} Should strl content be exported as binary files?} \item{strlpath}{\emph{character.} Path for strl export.} \item{tz}{\emph{character.} time zone specification to be used for POSIXct values. ‘""’ is the current time zone, and ‘"GMT"’ is UTC (Universal Time, Coordinated).} } \value{ The function returns a data.frame with attributes. The attributes include \describe{ \item{datalabel:}{Dataset label} \item{time.stamp:}{Timestamp of file creation} \item{formats:}{Stata display formats. May be used with \code{\link{sprintf}}} \item{types:}{Stata data type (see Stata Corp 2014)} \item{val.labels:}{For each variable the name of the associated value labels in "label"} \item{var.labels:}{Variable labels} \item{version:}{dta file format version} \item{label.table:}{List of value labels.} \item{strl:}{Character vector with long strings for the new strl string variable type. The name of every element is the identifier.} \item{expansion.fields:}{list providing variable name, characteristic name and the contents of Stata characteristic field.} \item{missing:}{List of numeric vectors with Stata missing type for each variable.} \item{byteorder:}{Byteorder of the dta-file. LSF or MSF.} \item{orig.dim:}{Dimension recorded inside the dta-file.} } } \description{ \code{read.dta13} reads a Stata dta-file and imports the data into a data.frame. } \details{ If the filename is a url, the file will be downloaded as a temporary file and read afterwards. Stata files are encoded in ansinew. Depending on your system's default encoding certain characters may appear wrong. Using a correct encoding may fix these. Variable names stored in the dta-file will be used in the resulting data.frame. Stata types char, byte, and int will become integer; float and double will become numerics. R only knows a single missing type, while Stata knows 27, so all Stata missings will become NA in R. If you need to keep track of Statas original missing types, you may use \code{missing.type=TRUE}. Stata dates are converted to R's Date class the same way foreign handles dates. Stata 13 introduced a new character type called strL. strLs are able to store strings up to 2 billion characters. While R is able to store strings of this size in a character vector, the printed representation of such vectors looks rather cluttered, so it's possible to save only a reference in the data.frame with option \code{replace.strl=FALSE}. In R, you may use rownames to store characters (see for instance \code{data(swiss)}). In Stata, this is not possible and rownames have to be stored as a variable. If you want to use rownames, set add.rownames to TRUE. Then the first variable of the dta-file will hold the rownames of the resulting data.frame. Reading dta-files of older and newer versions than 13 was introduced with version 0.8. Stata 18 introduced alias variables and frame files. Alias variables are currently ignored when reading the file and a warning is printed. Stata frame files (file extension `.dtas`) contain zipped `dta` files which can be imported with \code{\link{read.dtas}}. } \note{ read.dta13 uses GPL 2 licensed code by Thomas Lumley and R-core members from foreign::read.dta(). } \examples{ \dontrun{ library(readstata13) r13 <- read.dta13("https://www.stata-press.com/data/r13/auto.dta") } } \references{ Stata Corp (2014): Description of .dta file format \url{https://www.stata.com/help.cgi?dta} } \seealso{ \code{\link[foreign]{read.dta}} in package \code{foreign} and \code{memisc} for dta files from Stata versions < 13 and \code{read_dta} in package \code{haven} for Stata version >= 13. } \author{ Jan Marvin Garbuszus \email{jan.garbuszus@ruhr-uni-bochum.de} Sebastian Jeworutzki \email{sebastian.jeworutzki@ruhr-uni-bochum.de} } readstata13/man/set.label.Rd0000644000176200001440000000172614372711643015317 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/tools.R \name{set.label} \alias{set.label} \title{Assign Stata Labels to a Variable} \usage{ set.label(dat, var.name, lang = NA) } \arguments{ \item{dat}{\emph{data.frame.} Data.frame created by \code{read.dta13}.} \item{var.name}{\emph{character.} Name of the variable in the data.frame} \item{lang}{\emph{character.} Label language. Default language defined by \code{\link{get.lang}} is used if NA} } \value{ Returns a labeled factor } \description{ Assign value labels from a Stata label set to a variable. If duplicated labels are found, unique labels will be generated according the following scheme: "label_(integer code)". Levels without labels will become . } \examples{ dat <- read.dta13(system.file("extdata/statacar.dta", package="readstata13"), convert.factors=FALSE) # compare vectors set.label(dat, "type") dat$type # German label set.label(dat, "type", "de") } readstata13/man/get.frames.Rd0000644000176200001440000000122215002634740015461 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/tools.R \name{get.frames} \alias{get.frames} \title{List frames in Stata dtas files} \usage{ get.frames(path) } \arguments{ \item{path}{path to .dtas file} } \value{ Returns a data.frame with frame names, internal filenames and dta file format version. } \description{ Stata 18 introduced framesets (file extension `.dtas`) that contain zipped `dta` files. This helper functions imports those files and returns a list of data.frames. } \examples{ path <- system.file("extdata", "myproject2.dtas", package="readstata13") # print all frames in myproject2.dtas get.frames(path) } readstata13/man/get.lang.Rd0000644000176200001440000000166314372711643015145 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/tools.R \name{get.lang} \alias{get.lang} \title{Show Default Label Language} \usage{ get.lang(dat, print = T) } \arguments{ \item{dat}{\emph{data.frame.} Data.frame created by \code{read.dta13}.} \item{print}{\emph{logical.} If \code{TRUE}, print available languages and default language.} } \value{ Returns a list with two components: \describe{ \item{languages:}{Vector of label languages used in the dataset} \item{default:}{Name of the actual default label language, otherwise NA} } } \description{ Displays informations about the defined label languages. } \details{ Stata allows to define multiple label sets in different languages. This functions reports the available languages and the selected default language. } \author{ Jan Marvin Garbuszus \email{jan.garbuszus@ruhr-uni-bochum.de} Sebastian Jeworutzki \email{sebastian.jeworutzki@ruhr-uni-bochum.de} } readstata13/man/get.label.tables.Rd0000644000176200001440000000140514372711643016546 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/tools.R \name{get.label.tables} \alias{get.label.tables} \title{Get all Stata Label Sets for a Data.frame} \usage{ get.label.tables(dat) } \arguments{ \item{dat}{\emph{data.frame.} Data.frame created by \code{read.dta13}.} } \value{ Returns a named list of label tables } \description{ Retrieve the value labels for all variables. } \details{ This function returns the factor levels which represent a Stata label set for all variables. } \examples{ dat <- read.dta13(system.file("extdata/statacar.dta", package="readstata13")) get.label.tables(dat) } \author{ Jan Marvin Garbuszus \email{jan.garbuszus@ruhr-uni-bochum.de} Sebastian Jeworutzki \email{sebastian.jeworutzki@ruhr-uni-bochum.de} } readstata13/man/get.label.name.Rd0000644000176200001440000000171114372711643016214 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/tools.R \name{get.label.name} \alias{get.label.name} \title{Get Names of Stata Label Set} \usage{ get.label.name(dat, var.name = NULL, lang = NA) } \arguments{ \item{dat}{\emph{data.frame.} Data.frame created by \code{read.dta13}.} \item{var.name}{\emph{character vector.} Variable names. If \code{NULL}, get names of all label sets.} \item{lang}{\emph{character.} Label language. Default language defined by \code{\link{get.lang}} is used if NA} } \value{ Returns an named vector of variable labels } \description{ Retrieves the Stata label set in the dataset for all or an vector of variable names. } \details{ Stata stores factor labels in variable independent labels sets. This function retrieves the name of the label set for a variable. } \author{ Jan Marvin Garbuszus \email{jan.garbuszus@ruhr-uni-bochum.de} Sebastian Jeworutzki \email{sebastian.jeworutzki@ruhr-uni-bochum.de} } readstata13/man/get.label.Rd0000644000176200001440000000170114372711643015274 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/tools.R \name{get.label} \alias{get.label} \title{Get Stata Label Table for a Label Set} \usage{ get.label(dat, label.name) } \arguments{ \item{dat}{\emph{data.frame.} Data.frame created by \code{read.dta13}.} \item{label.name}{\emph{character.} Name of the Stata label set} } \value{ Returns a named vector of code numbers } \description{ Retrieve the value labels for a specific Stata label set. } \details{ This function returns the table of factor levels which represent a Stata label set. The name of a label set for a variable can be obtained by \code{\link{get.label.name}}. } \examples{ dat <- read.dta13(system.file("extdata/statacar.dta", package="readstata13")) labname <- get.label.name(dat,"type") get.label(dat, labname) } \author{ Jan Marvin Garbuszus \email{jan.garbuszus@ruhr-uni-bochum.de} Sebastian Jeworutzki \email{sebastian.jeworutzki@ruhr-uni-bochum.de} } readstata13/DESCRIPTION0000644000176200001440000000255415002667742014113 0ustar liggesusersPackage: readstata13 Type: Package Title: Import 'Stata' Data Files Version: 0.11.0 Authors@R: c( person("Jan Marvin", "Garbuszus", email = "jan.garbuszus@ruhr-uni-bochum.de", role = c("aut")), person("Sebastian", "Jeworutzki", email="Sebastian.Jeworutzki@ruhr-uni-bochum.de", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-2671-5253")), person("R Core Team", role="cph"), person("Magnus Thor", "Torfason", role="ctb"), person("Luke M.", "Olson", role="ctb"), person("Giovanni", "Righi", role="ctb"), person("Kevin", "Jin", role="ctb") ) Description: Function to read and write the 'Stata' file format. URL: https://github.com/sjewo/readstata13 BugReports: https://github.com/sjewo/readstata13/issues License: GPL-2 | file LICENSE Imports: Rcpp (>= 0.11.5) LinkingTo: Rcpp ByteCompile: yes Suggests: testthat, knitr, rmarkdown, curl, png, expss, labelled VignetteBuilder: knitr Encoding: UTF-8 RoxygenNote: 7.3.2 NeedsCompilation: yes Packaged: 2025-04-25 10:18:03 UTC; sj Author: Jan Marvin Garbuszus [aut], Sebastian Jeworutzki [aut, cre] (), R Core Team [cph], Magnus Thor Torfason [ctb], Luke M. Olson [ctb], Giovanni Righi [ctb], Kevin Jin [ctb] Maintainer: Sebastian Jeworutzki Repository: CRAN Date/Publication: 2025-04-25 11:20:02 UTC