rio/0000755000176200001440000000000014146644522011053 5ustar liggesusersrio/NAMESPACE0000644000176200001440000000771414135741523012300 0ustar liggesusers# Generated by roxygen2: do not edit by hand S3method(.export,default) S3method(.export,rio_arff) S3method(.export,rio_clipboard) S3method(.export,rio_csv) S3method(.export,rio_csv2) S3method(.export,rio_csvy) S3method(.export,rio_dbf) S3method(.export,rio_dta) S3method(.export,rio_dump) S3method(.export,rio_feather) S3method(.export,rio_fst) S3method(.export,rio_fwf) S3method(.export,rio_html) S3method(.export,rio_json) S3method(.export,rio_matlab) S3method(.export,rio_ods) S3method(.export,rio_parquet) S3method(.export,rio_psv) S3method(.export,rio_pzfx) S3method(.export,rio_r) S3method(.export,rio_rda) S3method(.export,rio_rdata) S3method(.export,rio_rds) S3method(.export,rio_sas7bdat) S3method(.export,rio_sav) S3method(.export,rio_tsv) S3method(.export,rio_txt) S3method(.export,rio_xlsx) S3method(.export,rio_xml) S3method(.export,rio_xpt) S3method(.export,rio_yml) S3method(.export,rio_zsav) S3method(.import,default) S3method(.import,rio_arff) S3method(.import,rio_clipboard) S3method(.import,rio_csv) S3method(.import,rio_csv2) S3method(.import,rio_csvy) S3method(.import,rio_dat) S3method(.import,rio_dbf) S3method(.import,rio_dif) S3method(.import,rio_dta) S3method(.import,rio_dump) S3method(.import,rio_eviews) S3method(.import,rio_feather) S3method(.import,rio_fortran) S3method(.import,rio_fst) S3method(.import,rio_fwf) S3method(.import,rio_html) S3method(.import,rio_json) S3method(.import,rio_matlab) S3method(.import,rio_mtp) S3method(.import,rio_ods) S3method(.import,rio_parquet) S3method(.import,rio_psv) S3method(.import,rio_pzfx) S3method(.import,rio_r) S3method(.import,rio_rda) S3method(.import,rio_rdata) S3method(.import,rio_rds) S3method(.import,rio_rec) S3method(.import,rio_sas7bdat) S3method(.import,rio_sav) S3method(.import,rio_spss) S3method(.import,rio_syd) S3method(.import,rio_tsv) S3method(.import,rio_txt) S3method(.import,rio_xls) S3method(.import,rio_xlsx) S3method(.import,rio_xml) S3method(.import,rio_xpt) S3method(.import,rio_yml) S3method(.import,rio_zsav) S3method(characterize,data.frame) S3method(characterize,default) S3method(factorize,data.frame) S3method(factorize,default) export(.export) export(.import) export(characterize) export(convert) export(export) export(export_list) export(factorize) export(gather_attrs) export(get_ext) export(import) export(import_list) export(install_formats) export(is_file_text) export(spread_attrs) importFrom(curl,curl_fetch_memory) importFrom(curl,parse_headers) importFrom(data.table,as.data.table) importFrom(data.table,fread) importFrom(data.table,fwrite) importFrom(data.table,is.data.table) importFrom(foreign,read.arff) importFrom(foreign,read.dbf) importFrom(foreign,read.dta) importFrom(foreign,read.epiinfo) importFrom(foreign,read.mtp) importFrom(foreign,read.spss) importFrom(foreign,read.systat) importFrom(foreign,read.xport) importFrom(foreign,write.arff) importFrom(foreign,write.dbf) importFrom(haven,labelled) importFrom(haven,read_dta) importFrom(haven,read_por) importFrom(haven,read_sas) importFrom(haven,read_sav) importFrom(haven,read_xpt) importFrom(haven,write_dta) importFrom(haven,write_sas) importFrom(haven,write_sav) importFrom(haven,write_xpt) importFrom(openxlsx,read.xlsx) importFrom(openxlsx,write.xlsx) importFrom(readxl,read_xls) importFrom(readxl,read_xlsx) importFrom(stats,na.omit) importFrom(stats,setNames) importFrom(tibble,as_tibble) importFrom(tibble,is_tibble) importFrom(tools,file_ext) importFrom(tools,file_path_sans_ext) importFrom(utils,capture.output) importFrom(utils,install.packages) importFrom(utils,installed.packages) importFrom(utils,packageName) importFrom(utils,read.DIF) importFrom(utils,read.fortran) importFrom(utils,read.fwf) importFrom(utils,read.table) importFrom(utils,tar) importFrom(utils,type.convert) importFrom(utils,untar) importFrom(utils,unzip) importFrom(utils,write.csv) importFrom(utils,write.table) importFrom(utils,zip) rio/README.md0000644000176200001440000005135114050203722012322 0ustar liggesusers # rio: A Swiss-Army Knife for Data I/O [![CRAN Version](https://www.r-pkg.org/badges/version/rio)](https://cran.r-project.org/package=rio) ![Downloads](https://cranlogs.r-pkg.org/badges/rio) [![Travis-CI Build Status](https://travis-ci.org/leeper/rio.png?branch=master)](https://travis-ci.org/leeper/rio) [![Appveyor Build status](https://ci.appveyor.com/api/projects/status/40ua5l06jw0gjyjb?svg=true)](https://ci.appveyor.com/project/leeper/rio) [![codecov.io](https://codecov.io/github/leeper/rio/coverage.svg?branch=master)](https://codecov.io/github/leeper/rio?branch=master) ## Overview The aim of **rio** is to make data file I/O in R as easy as possible by implementing four simple functions in Swiss-army knife style: - `import()` provides a painless data import experience by automatically choosing the appropriate import/read function based on file extension (or a specified `format` argument) - `import_list()` imports a list of data frames from a multi-object file (Excel workbook, .Rdata files, zip directory, or HTML file) - `export()` provides the same painless file recognition for data export/write functionality - `convert()` wraps `import()` and `export()` to allow the user to easily convert between file formats (thus providing a FOSS replacement for programs like [Stat/Transfer](https://stattransfer.com/) or [Sledgehammer](https://www.mtna.us/#/products/sledgehammer)). Relatedly, [Luca Braglia](https://lbraglia.github.io/) has created a Shiny app called [rioweb](https://github.com/lbraglia/rioweb) that provides access to the file conversion features of rio. [GREA](https://github.com/Stan125/GREA/) is an RStudio add-in that provides an interactive interface for reading in data using rio. ## Installation The package is available on [CRAN](https://cran.r-project.org/package=rio) and can be installed directly in R using `install.packages()`. You may want to run `install_formats()` after the first installation. ``` r install.packages("rio") install_formats() ``` The latest development version on GitHub can be installed using: ``` r if (!require("remotes")){ install.packages("remotes") } remotes::install_github("leeper/rio") ``` ## Usage Because **rio** is meant to streamline data I/O, the package is extremely easy to use. Here are some examples of reading, writing, and converting data files. ### Export Exporting data is handled with one function, `export()`: ``` r library("rio") export(mtcars, "mtcars.csv") # comma-separated values export(mtcars, "mtcars.rds") # R serialized export(mtcars, "mtcars.sav") # SPSS ``` A particularly useful feature of rio is the ability to import from and export to compressed (e.g., zip) directories, saving users the extra step of compressing a large exported file, e.g.: ``` r export(mtcars, "mtcars.tsv.zip") ``` As of rio v0.5.0, `export()` can also write multiple data frames to respective sheets of an Excel workbook or an HTML file: ``` r export(list(mtcars = mtcars, iris = iris), file = "mtcars.xlsx") ``` ### Import Importing data is handled with one function, `import()`: ``` r x <- import("mtcars.csv") y <- import("mtcars.rds") z <- import("mtcars.sav") # confirm data match all.equal(x, y, check.attributes = FALSE) ``` ## [1] TRUE ``` r all.equal(x, z, check.attributes = FALSE) ``` ## [1] TRUE Note: Because of inconsistencies across underlying packages, the data.frame returned by `import` might vary slightly (in variable classes and attributes) depending on file type. In rio v0.5.0, a new list-based import function was added. This allows users to import a list of data frames from a multi-object file (such as an Excel workbook, .Rdata file, zip directory, or HTML file): ``` r str(m <- import_list("mtcars.xlsx")) ``` ## List of 2 ## $ mtcars:'data.frame': 32 obs. of 11 variables: ## ..$ mpg : num [1:32] 21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ... ## ..$ cyl : num [1:32] 6 6 4 6 8 6 8 4 4 6 ... ## ..$ disp: num [1:32] 160 160 108 258 360 ... ## ..$ hp : num [1:32] 110 110 93 110 175 105 245 62 95 123 ... ## ..$ drat: num [1:32] 3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ... ## ..$ wt : num [1:32] 2.62 2.88 2.32 3.21 3.44 ... ## ..$ qsec: num [1:32] 16.5 17 18.6 19.4 17 ... ## ..$ vs : num [1:32] 0 0 1 1 0 1 0 1 1 1 ... ## ..$ am : num [1:32] 1 1 1 0 0 0 0 0 0 0 ... ## ..$ gear: num [1:32] 4 4 4 3 3 3 3 4 4 4 ... ## ..$ carb: num [1:32] 4 4 1 1 2 1 4 2 2 4 ... ## $ iris :'data.frame': 150 obs. of 5 variables: ## ..$ Sepal.Length: num [1:150] 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... ## ..$ Sepal.Width : num [1:150] 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... ## ..$ Petal.Length: num [1:150] 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ... ## ..$ Petal.Width : num [1:150] 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... ## ..$ Species : chr [1:150] "setosa" "setosa" "setosa" "setosa" ... And for rio v0.6.0, a new list-based export function was added. This makes it easy to export a list of (possibly named) data frames to multiple files: ``` r export_list(m, "%s.tsv") c("mtcars.tsv", "iris.tsv") %in% dir() ``` ## [1] TRUE TRUE ### Convert The `convert()` function links `import()` and `export()` by constructing a dataframe from the imported file and immediately writing it back to disk. `convert()` invisibly returns the file name of the exported file, so that it can be used to programmatically access the new file. ``` r convert("mtcars.sav", "mtcars.dta") ``` It is also possible to use **rio** on the command-line by calling `Rscript` with the `-e` (expression) argument. For example, to convert a file from Stata (.dta) to comma-separated values (.csv), simply do the following: Rscript -e "rio::convert('iris.dta', 'iris.csv')" ## Supported file formats **rio** supports a wide range of file formats. To keep the package slim, all non-essential formats are supported via “Suggests” packages, which are not installed (or loaded) by default. To ensure rio is fully functional, install these packages the first time you use **rio** via: ``` r install_formats() ``` The full list of supported formats is below: | Format | Typical Extension | Import Package | Export Package | Installed by Default | | ----------------------------------------------------- | ----------------------- | --------------------------------------------------------------- | --------------------------------------------------------------- | -------------------- | | Comma-separated data | .csv | [**data.table**](https://cran.r-project.org/package=data.table) | [**data.table**](https://cran.r-project.org/package=data.table) | Yes | | Pipe-separated data | .psv | [**data.table**](https://cran.r-project.org/package=data.table) | [**data.table**](https://cran.r-project.org/package=data.table) | Yes | | Tab-separated data | .tsv | [**data.table**](https://cran.r-project.org/package=data.table) | [**data.table**](https://cran.r-project.org/package=data.table) | Yes | | CSVY (CSV + YAML metadata header) | .csvy | [**data.table**](https://cran.r-project.org/package=data.table) | [**data.table**](https://cran.r-project.org/package=data.table) | Yes | | SAS | .sas7bdat | [**haven**](https://cran.r-project.org/package=haven) | [**haven**](https://cran.r-project.org/package=haven) | Yes | | SPSS | .sav | [**haven**](https://cran.r-project.org/package=haven) | [**haven**](https://cran.r-project.org/package=haven) | Yes | | SPSS (compressed) | .zsav | [**haven**](https://cran.r-project.org/package=haven) | [**haven**](https://cran.r-project.org/package=haven) | Yes | | Stata | .dta | [**haven**](https://cran.r-project.org/package=haven) | [**haven**](https://cran.r-project.org/package=haven) | Yes | | SAS XPORT | .xpt | [**haven**](https://cran.r-project.org/package=haven) | [**haven**](https://cran.r-project.org/package=haven) | Yes | | SPSS Portable | .por | [**haven**](https://cran.r-project.org/package=haven) | | Yes | | Excel | .xls | [**readxl**](https://cran.r-project.org/package=readxl) | | Yes | | Excel | .xlsx | [**readxl**](https://cran.r-project.org/package=readxl) | [**openxlsx**](https://cran.r-project.org/package=openxlsx) | Yes | | R syntax | .R | **base** | **base** | Yes | | Saved R objects | .RData, .rda | **base** | **base** | Yes | | Serialized R objects | .rds | **base** | **base** | Yes | | Epiinfo | .rec | [**foreign**](https://cran.r-project.org/package=foreign) | | Yes | | Minitab | .mtp | [**foreign**](https://cran.r-project.org/package=foreign) | | Yes | | Systat | .syd | [**foreign**](https://cran.r-project.org/package=foreign) | | Yes | | “XBASE” database files | .dbf | [**foreign**](https://cran.r-project.org/package=foreign) | [**foreign**](https://cran.r-project.org/package=foreign) | Yes | | Weka Attribute-Relation File Format | .arff | [**foreign**](https://cran.r-project.org/package=foreign) | [**foreign**](https://cran.r-project.org/package=foreign) | Yes | | Data Interchange Format | .dif | **utils** | | Yes | | Fortran data | no recognized extension | **utils** | | Yes | | Fixed-width format data | .fwf | **utils** | **utils** | Yes | | gzip comma-separated data | .csv.gz | **utils** | **utils** | Yes | | Apache Arrow (Parquet) | .parquet | [**arrow**](https://cran.r-project.org/package=arrow) | [**arrow**](https://cran.r-project.org/package=arrow) | No | | EViews | .wf1 | [**hexView**](https://cran.r-project.org/package=hexView) | | No | | Feather R/Python interchange format | .feather | [**feather**](https://cran.r-project.org/package=feather) | [**feather**](https://cran.r-project.org/package=feather) | No | | Fast Storage | .fst | [**fst**](https://cran.r-project.org/package=fst) | [**fst**](https://cran.r-project.org/package=fst) | No | | JSON | .json | [**jsonlite**](https://cran.r-project.org/package=jsonlite) | [**jsonlite**](https://cran.r-project.org/package=jsonlite) | No | | Matlab | .mat | [**rmatio**](https://cran.r-project.org/package=rmatio) | [**rmatio**](https://cran.r-project.org/package=rmatio) | No | | OpenDocument Spreadsheet | .ods | [**readODS**](https://cran.r-project.org/package=readODS) | [**readODS**](https://cran.r-project.org/package=readODS) | No | | HTML Tables | .html | [**xml2**](https://cran.r-project.org/package=xml2) | [**xml2**](https://cran.r-project.org/package=xml2) | No | | Shallow XML documents | .xml | [**xml2**](https://cran.r-project.org/package=xml2) | [**xml2**](https://cran.r-project.org/package=xml2) | No | | YAML | .yml | [**yaml**](https://cran.r-project.org/package=yaml) | [**yaml**](https://cran.r-project.org/package=yaml) | No | | Clipboard | default is tsv | [**clipr**](https://cran.r-project.org/package=clipr) | [**clipr**](https://cran.r-project.org/package=clipr) | No | | [Google Sheets](https://www.google.com/sheets/about/) | as Comma-separated data | | | | | Graphpad Prism | .pzfx | [**pzfx**](https://cran.r-project.org/package=pzfx) | [**pzfx**](https://cran.r-project.org/package=pzfx) | No | Additionally, any format that is not supported by **rio** but that has a known R implementation will produce an informative error message pointing to a package and import or export function. Unrecognized formats will yield a simple “Unrecognized file format” error. ## Package Philosophy The core advantage of **rio** is that it makes assumptions that the user is probably willing to make. Eight of these are important: 1. **rio** uses the file extension of a file name to determine what kind of file it is. This is the same logic used by Windows OS, for example, in determining what application is associated with a given file type. By removing the need to manually match a file type (which a beginner may not recognize) to a particular import or export function, **rio** allows almost all common data formats to be read with the same function. And if a file extension is incorrect, users can force a particular import method by specifying the `format` argument. Other packages do this as well, but **rio** aims to be more complete and more consistent than each: - [**reader**](https://cran.r-project.org/package=reader) handles certain text formats and R binary files - [**io**](https://cran.r-project.org/package=io) offers a set of custom formats - [**ImportExport**](https://cran.r-project.org/package=ImportExport) focuses on select binary formats (Excel, SPSS, and Access files) and provides a Shiny interface. - [**SchemaOnRead**](https://cran.r-project.org/package=SchemaOnRead) iterates through a large number of possible import methods until one works successfully 2. **rio** uses `data.table::fread()` for text-delimited files to automatically determine the file format regardless of the extension. So, a CSV that is actually tab-separated will still be correctly imported. It’s also crazy fast. 3. **rio**, wherever possible, does not import character strings as factors. 4. **rio** supports web-based imports natively, including from SSL (HTTPS) URLs, from shortened URLs, from URLs that lack proper extensions, and from (public) Google Documents Spreadsheets. 5. **rio** imports from from single-file .zip and .tar archives automatically, without the need to explicitly decompress them. Export to compressed directories is also supported. 6. **rio** wraps a variety of faster, more stream-lined I/O packages than those provided by base R or the **foreign** package. It uses [**data.table**](https://cran.r-project.org/package=data.table) for delimited formats, [**haven**](https://cran.r-project.org/package=haven) for SAS, Stata, and SPSS files, smarter and faster fixed-width file import and export routines, and [**readxl**](https://cran.r-project.org/package=readxl) and [**openxlsx**](https://cran.r-project.org/package=openxlsx) for reading and writing Excel workbooks. 7. **rio** stores metadata from rich file formats (SPSS, Stata, etc.) in variable-level attributes in a consistent form regardless of file type or underlying import function. These attributes are identified as: - `label`: a description of variable - `labels`: a vector mapping numeric values to character strings those values represent - `format`: a character string describing the variable storage type in the original file The `gather_attrs()` function makes it easy to move variable-level attributes to the data frame level (and `spread_attrs()` reverses that gathering process). These can be useful, especially, during file conversion to more easily modify attributes that are handled differently across file formats. As an example, the following idiom can be used to trim SPSS value labels to the 32-character maximum allowed by Stata: ``` r dat <- gather_attrs(rio::import("data.sav")) attr(dat, "labels") <- lapply(attributes(dat)$labels, function(x) { if (!is.null(x)) { names(x) <- substring(names(x), 1, 32) } x }) export(spread_attrs(dat), "data.dta") ``` In addition, two functions (added in v0.5.5) provide easy ways to create character and factor variables from these “labels” attributes. `characterize()` converts a single variable or all variables in a data frame that have “labels” attributes into character vectors based on the mapping of values to value labels. `factorize()` does the same but returns factor variables. This can be especially helpful for converting these rich file formats into open formats (e.g., `export(characterize(import("file.dta")), "file.csv")`. 8. **rio** imports and exports files based on an internal S3 class infrastructure. This means that other packages can contain extensions to **rio** by registering S3 methods. These methods should take the form `.import.rio_X()` and `.export.rio_X()`, where `X` is the file extension of a file type. An example is provided in the [rio.db package](https://github.com/leeper/rio.db). rio/man/0000755000176200001440000000000014014451034011612 5ustar liggesusersrio/man/import.Rd0000644000176200001440000002210114135736506013425 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/import.R \name{import} \alias{import} \title{Import} \usage{ import(file, format, setclass, which, ...) } \arguments{ \item{file}{A character string naming a file, URL, or single-file .zip or .tar archive.} \item{format}{An optional character string code of file format, which can be used to override the format inferred from \code{file}. Shortcuts include: \dQuote{,} (for comma-separated values), \dQuote{;} (for semicolon-separated values), and \dQuote{|} (for pipe-separated values).} \item{setclass}{An optional character vector specifying one or more classes to set on the import. By default, the return object is always a \dQuote{data.frame}. Allowed values include \dQuote{tbl_df}, \dQuote{tbl}, or \dQuote{tibble} (if using dplyr) or \dQuote{data.table} (if using data.table). Other values are ignored, such that a data.frame is returned.} \item{which}{This argument is used to control import from multi-object files; as a rule \code{import} only ever returns a single data frame (use \code{\link{import_list}} to import multiple data frames from a multi-object file). If \code{file} is a compressed directory, \code{which} can be either a character string specifying a filename or an integer specifying which file (in locale sort order) to extract from the compressed directory. For Excel spreadsheets, this can be used to specify a sheet name or number. For .Rdata files, this can be an object name. For HTML files, it identifies which table to extract (from document order). Ignored otherwise. A character string value will be used as a regular expression, such that the extracted file is the first match of the regular expression against the file names in the archive.} \item{\dots}{Additional arguments passed to the underlying import functions. For example, this can control column classes for delimited file types, or control the use of haven for Stata and SPSS or readxl for Excel (.xlsx) format. See details below.} } \value{ A data frame. If \code{setclass} is used, this data frame may have additional class attribute values, such as \dQuote{tibble} or \dQuote{data.table}. } \description{ Read in a data.frame from a file. Exceptions to this rule are Rdata, RDS, and JSON input file formats, which return the originally saved object without changing its class. } \details{ This function imports a data frame or matrix from a data file with the file format based on the file extension (or the manually specified format, if \code{format} is specified). \code{import} supports the following file formats: \itemize{ \item Comma-separated data (.csv), using \code{\link[data.table]{fread}} or, if \code{fread = FALSE}, \code{\link[utils]{read.table}} with \code{row.names = FALSE} and \code{stringsAsFactors = FALSE} \item Pipe-separated data (.psv), using \code{\link[data.table]{fread}} or, if \code{fread = FALSE}, \code{\link[utils]{read.table}} with \code{sep = '|'}, \code{row.names = FALSE} and \code{stringsAsFactors = FALSE} \item Tab-separated data (.tsv), using \code{\link[data.table]{fread}} or, if \code{fread = FALSE}, \code{\link[utils]{read.table}} with \code{row.names = FALSE} and \code{stringsAsFactors = FALSE} \item SAS (.sas7bdat), using \code{\link[haven]{read_sas}}. \item SAS XPORT (.xpt), using \code{\link[haven]{read_xpt}} or, if \code{haven = FALSE}, \code{\link[foreign]{read.xport}}. \item SPSS (.sav), using \code{\link[haven]{read_sav}}. If \code{haven = FALSE}, \code{\link[foreign]{read.spss}} can be used. \item SPSS compressed (.zsav), using \code{\link[haven]{read_sav}}. \item Stata (.dta), using \code{\link[haven]{read_dta}}. If \code{haven = FALSE}, \code{\link[foreign]{read.dta}} can be used. \item SPSS Portable Files (.por), using \code{\link[haven]{read_por}}. \item Excel (.xls and .xlsx), using \code{\link[readxl]{read_excel}}. Use \code{which} to specify a sheet number. For .xlsx files, it is possible to set \code{readxl = FALSE}, so that \code{\link[openxlsx]{read.xlsx}} can be used instead of readxl (the default). \item R syntax object (.R), using \code{\link[base]{dget}} \item Saved R objects (.RData,.rda), using \code{\link[base]{load}} for single-object .Rdata files. Use \code{which} to specify an object name for multi-object .Rdata files. This can be any R object (not just a data frame). \item Serialized R objects (.rds), using \code{\link[base]{readRDS}}. This can be any R object (not just a data frame). \item Epiinfo (.rec), using \code{\link[foreign]{read.epiinfo}} \item Minitab (.mtp), using \code{\link[foreign]{read.mtp}} \item Systat (.syd), using \code{\link[foreign]{read.systat}} \item "XBASE" database files (.dbf), using \code{\link[foreign]{read.dbf}} \item Weka Attribute-Relation File Format (.arff), using \code{\link[foreign]{read.arff}} \item Data Interchange Format (.dif), using \code{\link[utils]{read.DIF}} \item Fortran data (no recognized extension), using \code{\link[utils]{read.fortran}} \item Fixed-width format data (.fwf), using a faster version of \code{\link[utils]{read.fwf}} that requires a \code{widths} argument and by default in rio has \code{stringsAsFactors = FALSE}. If \code{readr = TRUE}, import will be performed using \code{\link[readr]{read_fwf}}, where \code{widths} should be: \code{NULL}, a vector of column widths, or the output of \code{\link[readr]{fwf_empty}}, \code{\link[readr]{fwf_widths}}, or \code{\link[readr]{fwf_positions}}. \item gzip comma-separated data (.csv.gz), using \code{\link[utils]{read.table}} with \code{row.names = FALSE} and \code{stringsAsFactors = FALSE} \item \href{https://github.com/csvy}{CSVY} (CSV with a YAML metadata header) using \code{\link[data.table]{fread}}. \item Apache Arrow Parquet (.parquet), using \code{\link[arrow]{read_parquet}} \item Feather R/Python interchange format (.feather), using \code{\link[feather]{read_feather}} \item Fast storage (.fst), using \code{\link[fst]{read.fst}} \item JSON (.json), using \code{\link[jsonlite]{fromJSON}} \item Matlab (.mat), using \code{\link[rmatio]{read.mat}} \item EViews (.wf1), using \code{\link[hexView]{readEViews}} \item OpenDocument Spreadsheet (.ods), using \code{\link[readODS]{read_ods}}. Use \code{which} to specify a sheet number. \item Single-table HTML documents (.html), using \code{\link[xml2]{read_html}}. The data structure will only be read correctly if the HTML file can be converted to a list via \code{\link[xml2]{as_list}}. \item Shallow XML documents (.xml), using \code{\link[xml2]{read_xml}}. The data structure will only be read correctly if the XML file can be converted to a list via \code{\link[xml2]{as_list}}. \item YAML (.yml), using \code{\link[yaml]{yaml.load}} \item Clipboard import (on Windows and Mac OS), using \code{\link[utils]{read.table}} with \code{row.names = FALSE} \item Google Sheets, as Comma-separated data (.csv) \item GraphPad Prism (.pzfx) using \code{\link[pzfx]{read_pzfx}} } \code{import} attempts to standardize the return value from the various import functions to the extent possible, thus providing a uniform data structure regardless of what import package or function is used. It achieves this by storing any optional variable-related attributes at the variable level (i.e., an attribute for \code{mtcars$mpg} is stored in \code{attributes(mtcars$mpg)} rather than \code{attributes(mtcars)}). If you would prefer these attributes to be stored at the data.frame-level (i.e., in \code{attributes(mtcars)}), see \code{\link{gather_attrs}}. After importing metadata-rich file formats (e.g., from Stata or SPSS), it may be helpful to recode labelled variables to character or factor using \code{\link{characterize}} or \code{\link{factorize}} respectively. } \note{ For csv and txt files with row names exported from \code{\link{export}}, it may be helpful to specify \code{row.names} as the column of the table which contain row names. See example below. } \examples{ # create CSV to import export(iris, csv_file <- tempfile(fileext = ".csv")) # specify `format` to override default format export(iris, tsv_file <- tempfile(fileext = ".tsv"), format = "csv") stopifnot(identical(import(csv_file), import(tsv_file, format = "csv"))) # import CSV as a `data.table` stopifnot(inherits(import(csv_file, setclass = "data.table"), "data.table")) # pass arguments to underlying import function iris1 <- import(csv_file) identical(names(iris), names(iris1)) export(iris, csv_file2 <- tempfile(fileext = ".csv"), col.names = FALSE) iris2 <- import(csv_file2) identical(names(iris), names(iris2)) # set class for the response data.frame as "tbl_df" (from dplyr) stopifnot(inherits(import(csv_file, setclass = "tbl_df"), "tbl_df")) # non-data frame formats supported for RDS, Rdata, and JSON export(list(mtcars, iris), rds_file <- tempfile(fileext = ".rds")) li <- import(rds_file) identical(names(mtcars), names(li[[1]])) # cleanup unlink(csv_file) unlink(csv_file2) unlink(tsv_file) unlink(rds_file) } \seealso{ \code{\link{import_list}}, \code{\link{.import}}, \code{\link{characterize}}, \code{\link{gather_attrs}}, \code{\link{export}}, \code{\link{convert}} } rio/man/rio.Rd0000644000176200001440000000323614135737451012714 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/rio.R \docType{package} \name{rio} \alias{rio} \title{A Swiss-Army Knife for Data I/O} \description{ The aim of rio is to make data file input and output as easy as possible. \code{\link{export}} and \code{\link{import}} serve as a Swiss-army knife for painless data I/O for data from almost any file format by inferring the data structure from the file extension, natively reading web-based data sources, setting reasonable defaults for import and export, and relying on efficient data import and export packages. An additional convenience function, \code{\link{convert}}, provides a simple method for converting between file types. Note that some of rio's functionality is provided by \sQuote{Suggests} dependendencies, meaning they are not installed by default. Use \code{\link{install_formats}} to make sure these packages are available for use. } \examples{ # export library("datasets") export(mtcars, csv_file <- tempfile(fileext = ".csv")) # comma-separated values export(mtcars, rds_file <- tempfile(fileext = ".rds")) # R serialized export(mtcars, sav_file <- tempfile(fileext = ".sav")) # SPSS # import x <- import(csv_file) y <- import(rds_file) z <- import(sav_file) # convert sav (SPSS) to dta (Stata) convert(sav_file, dta_file <- tempfile(fileext = ".dta")) # cleanup unlink(c(csv_file, rds_file, sav_file, dta_file)) } \references{ \href{https://github.com/Stan125/GREA}{GREA} provides an RStudio add-in to import data using rio. } \seealso{ \code{\link{import}}, \code{\link{import_list}}, \code{\link{export}}, \code{\link{export_list}}, \code{\link{convert}}, \code{\link{install_formats}} } rio/man/import_list.Rd0000644000176200001440000000550214135741523014461 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/import_list.R \name{import_list} \alias{import_list} \title{Import list of data frames} \usage{ import_list( file, setclass, which, rbind = FALSE, rbind_label = "_file", rbind_fill = TRUE, ... ) } \arguments{ \item{file}{A character string containing a single file name for a multi-object file (e.g., Excel workbook, zip directory, or HTML file), or a vector of file paths for multiple files to be imported.} \item{setclass}{An optional character vector specifying one or more classes to set on the import. By default, the return object is always a \dQuote{data.frame}. Allowed values include \dQuote{tbl_df}, \dQuote{tbl}, or \dQuote{tibble} (if using dplyr) or \dQuote{data.table} (if using data.table). Other values are ignored, such that a data.frame is returned.} \item{which}{If \code{file} is a single file path, this specifies which objects should be extracted (passed to \code{\link{import}}'s \code{which} argument). Ignored otherwise.} \item{rbind}{A logical indicating whether to pass the import list of data frames through \code{\link[data.table]{rbindlist}}.} \item{rbind_label}{If \code{rbind = TRUE}, a character string specifying the name of a column to add to the data frame indicating its source file.} \item{rbind_fill}{If \code{rbind = TRUE}, a logical indicating whether to set the \code{fill = TRUE} (and fill missing columns with \code{NA}).} \item{\dots}{Additional arguments passed to \code{\link{import}}. Behavior may be unexpected if files are of different formats.} } \value{ If \code{rbind=FALSE} (the default), a list of a data frames. Otherwise, that list is passed to \code{\link[data.table]{rbindlist}} with \code{fill = TRUE} and returns a data frame object of class set by the \code{setclass} argument; if this operation fails, the list is returned. } \description{ Use \code{\link{import}} to import a list of data frames from a vector of file names or from a multi-object file (Excel workbook, .Rdata file, zip directory, or HTML file) } \examples{ library('datasets') export(list(mtcars1 = mtcars[1:10,], mtcars2 = mtcars[11:20,], mtcars3 = mtcars[21:32,]), xlsx_file <- tempfile(fileext = ".xlsx") ) # import a single file from multi-object workbook str(import(xlsx_file, which = "mtcars1")) # import all worksheets str(import_list(xlsx_file), 1) # import and rbind all worksheets mtcars2 <- import_list(xlsx_file, rbind = TRUE) all.equal(mtcars2[,-12], mtcars, check.attributes = FALSE) # import multiple files wd <- getwd() setwd(tempdir()) export(mtcars, "mtcars1.csv") export(mtcars, "mtcars2.csv") str(import_list(dir(pattern = "csv$")), 1) unlink(c("mtcars1.csv", "mtcars2.csv")) setwd(wd) # cleanup unlink(xlsx_file) } \seealso{ \code{\link{import}}, \code{\link{export_list}}, \code{\link{export}} } rio/man/install_formats.Rd0000644000176200001440000000117513600364062015312 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/suggestions.R \name{install_formats} \alias{install_formats} \title{Install rio's \sQuote{Suggests} Dependencies} \usage{ install_formats(...) } \arguments{ \item{\dots}{Additional arguments passed to \code{\link[utils]{install.packages}}.} } \value{ \code{NULL} } \description{ This function installs various \sQuote{Suggests} dependencies for rio that expand its support to the full range of support import and export formats. These packages are not installed or loaded by default in order to create a slimmer and faster package build, install, and load. } rio/man/is_file_text.Rd0000644000176200001440000000167514135741404014577 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/is_file_text.R \name{is_file_text} \alias{is_file_text} \title{Determine whether a file is \dQuote{plain-text} or some sort of binary format} \usage{ is_file_text(file, maxsize = Inf, text_bytes = as.raw(c(7:16, 18, 19, 32:255))) } \arguments{ \item{file}{Path to the file} \item{maxsize}{Maximum number of bytes to read} \item{text_bytes}{Which characters are used by normal text (though not necessarily just ASCII). To detect just ASCII, the following value can be used: \code{as.raw(c(7:16, 18, 19, 32:127))}} } \value{ A logical } \description{ Determine whether a file is \dQuote{plain-text} or some sort of binary format } \examples{ library(datasets) export(iris, yml_file <- tempfile(fileext = ".yml")) is_file_text(yml_file) # TRUE export(iris, sav_file <- tempfile(fileext = ".sav")) is_file_text(sav_file) # FALSE # cleanup unlink(yml_file) unlink(sav_file) } rio/man/convert.Rd0000644000176200001440000000341214135737451013577 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/convert.R \name{convert} \alias{convert} \title{Convert from one file format to another} \usage{ convert(in_file, out_file, in_opts = list(), out_opts = list()) } \arguments{ \item{in_file}{A character string naming an input file.} \item{out_file}{A character string naming an output file.} \item{in_opts}{A named list of options to be passed to \code{\link{import}}.} \item{out_opts}{A named list of options to be passed to \code{\link{export}}.} } \value{ A character string containing the name of the output file (invisibly). } \description{ This function constructs a data frame from a data file using \code{\link{import}} and uses \code{\link{export}} to write the data to disk in the format indicated by the file extension. } \examples{ # create a file to convert export(mtcars, dta_file <- tempfile(fileext = ".dta")) # convert Stata to CSV and open converted file convert(dta_file, csv_file <- tempfile(fileext = ".csv")) head(import(csv_file)) # correct an erroneous file format export(mtcars, csv_file2 <- tempfile(fileext = ".csv"), format = "tsv") convert(csv_file2, csv_file, in_opts = list(format = "tsv")) # convert serialized R data.frame to JSON export(mtcars, rds_file <- tempfile(fileext = ".rds")) convert(rds_file, json_file <- tempfile(fileext = ".json")) # cleanup unlink(csv_file) unlink(csv_file2) unlink(rds_file) unlink(dta_file) unlink(json_file) \dontrun{\donttest{ # convert from the command line: ## Rscript -e "rio::convert('mtcars.dta', 'mtcars.csv')" }} } \seealso{ \href{https://lbraglia.github.io/}{Luca Braglia} has created a Shiny app called \href{https://github.com/lbraglia/rioweb}{rioweb} that provides access to the file conversion features of rio through a web browser. } rio/man/extensions.Rd0000644000176200001440000000327013600364062014306 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/extensions.R \name{.import} \alias{.import} \alias{extensions} \alias{.export} \alias{.import.default} \alias{.export.default} \title{rio Extensions} \usage{ .import(file, ...) \method{.import}{default}(file, ...) .export(file, x, ...) \method{.export}{default}(file, x, ...) } \arguments{ \item{file}{A character string naming a file.} \item{\dots}{Additional arguments passed to methods.} \item{x}{A data frame or matrix to be written into a file.} } \value{ For \code{.import}, an R data.frame. For \code{.export}, \code{file}, invisibly. } \description{ Writing Import/Export Extensions for rio } \details{ rio implements format-specific S3 methods for each type of file that can be imported from or exported to. This happens via internal S3 generics, \code{.import} and \code{.export}. It is possible to write new methods like with any S3 generic (e.g., \code{print}). As an example, \code{.import.rio_csv} imports from a comma-separated values file. If you want to produce a method for a new filetype with extension \dQuote{myfile}, you simply have to create a function called \code{.import.rio_myfile} that implements a format-specific importing routine and returns a data.frame. rio will automatically recognize new S3 methods, so that you can then import your file using: \code{import("file.myfile")}. As general guidance, if an import method creates many attributes, these attributes should be stored --- to the extent possible --- in variable-level attributes fields. These can be \dQuote{gathered} to the data.frame level by the user via \code{\link{gather_attrs}}. } \seealso{ \code{\link{import}}, \code{\link{export}} } rio/man/gather_attrs.Rd0000644000176200001440000000250614063055402014576 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/gather_attrs.R \name{gather_attrs} \alias{gather_attrs} \alias{spread_attrs} \title{Gather attributes from data frame variables} \usage{ gather_attrs(x) spread_attrs(x) } \arguments{ \item{x}{A data frame.} } \value{ \code{x}, with variable-level attributes stored at the data frame level. } \description{ \code{gather_attrs} moves variable-level attributes to the data frame level and \code{spread_attrs} reverses that operation. } \details{ \code{\link{import}} attempts to standardize the return value from the various import functions to the extent possible, thus providing a uniform data structure regardless of what import package or function is used. It achieves this by storing any optional variable-related attributes at the variable level (i.e., an attribute for \code{mtcars$mpg} is stored in \code{attributes(mtcars$mpg)} rather than \code{attributes(mtcars)}). \code{gather_attrs} moves these to the data frame level (i.e., in \code{attributes(mtcars)}). \code{spread_attrs} moves attributes back to the variable level. } \examples{ e <- try(import("http://www.stata-press.com/data/r13/auto.dta")) if (!inherits(e, "try-error")) { str(e) g <- gather_attrs(e) str(attributes(e)) str(g) } } \seealso{ \code{\link{import}}, \code{\link{characterize}} } rio/man/characterize.Rd0000644000176200001440000000341714135737451014570 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/characterize.R \name{characterize} \alias{characterize} \alias{factorize} \alias{characterize.default} \alias{characterize.data.frame} \alias{factorize.default} \alias{factorize.data.frame} \title{Character conversion of labelled data} \usage{ characterize(x, ...) factorize(x, ...) \method{characterize}{default}(x, ...) \method{characterize}{data.frame}(x, ...) \method{factorize}{default}(x, coerce_character = FALSE, ...) \method{factorize}{data.frame}(x, ...) } \arguments{ \item{x}{A vector or data frame.} \item{\dots}{additional arguments passed to methods} \item{coerce_character}{A logical indicating whether to additionally coerce character columns to factor (in \code{factorize}). Default \code{FALSE}.} } \description{ Convert labelled variables to character or factor } \details{ \code{characterize} converts a vector with a \code{labels} attribute of named levels into a character vector. \code{factorize} does the same but to factors. This can be useful at two stages of a data workflow: (1) importing labelled data from metadata-rich file formats (e.g., Stata or SPSS), and (2) exporting such data to plain text files (e.g., CSV) in a way that preserves information. } \examples{ # vector method x <- structure(1:4, labels = c("A" = 1, "B" = 2, "C" = 3)) characterize(x) factorize(x) # data frame method x <- data.frame(v1 = structure(1:4, labels = c("A" = 1, "B" = 2, "C" = 3)), v2 = structure(c(1,0,0,1), labels = c("foo" = 0, "bar" = 1))) str(factorize(x)) str(characterize(x)) # comparison of exported file contents import(export(x, csv_file <- tempfile(fileext = ".csv"))) import(export(factorize(x), csv_file)) # cleanup unlink(csv_file) } \seealso{ \code{\link{gather_attrs}} } rio/man/figures/0000755000176200001440000000000013334042064013261 5ustar liggesusersrio/man/figures/logo.png0000644000176200001440000002677013334042161014741 0ustar liggesusersPNG  IHDRU)sBIT|d pHYs N N+aotEXtSoftwarewww.inkscape.org< IDATxw|d+tP% AP"]zW(\.tQ&" XE!HBI;?Ͷ@3gߝ33g]h l-lK @@!RWZVCtkX6_2G6~eiWSV`gN5z6 !e3;O8z]ћ|nGw?IR `/2pR[#5ºG P*Oϧ^8ؗyqn\Qvz<K@mZjk1)Rq +C.CV7w|'5y8=^2YV G@{[g' >ݻ  t'/)t*\2bxUL ӵ[gqr9 -bye  [SKۅTkH<_~Qee+Ckߖƍ[RgQOIU 8d",jAuVa|)Rz;ԕ! GL54yy޳oR [ֲʱvaB5I`樹{;M;H;{x Ҷ 6ţCabi,Jj9"7lyI)!"kϖ,MXX7 GͭrR'6$.wE;=šLܼ԰A\F[÷RBߗظ] ~75{KeIaA6jn%~mVj $l J[GZB~J*ۊr r-04-Vc`A[ ả3Ж0Va|:RjךQi>ZCCNѿ`M-RQqC# Mm s P=H7eV+C5GUE5zɬ!2Yj)U5GWǶlu_*3ȉ# 3;OKeZJ~Fs蜱2 5GH$/! pǥq}B03&Q}_d X}KXF͛+=e5j5Q"ITe< W-ܚRw@loɎEĆ/ɼz0RWU¨(a[8/6UDrU ;4b 6N1uإ)HPѦFJ 0jR5%dP}bI9?=#G`N߯' ;wfͩsr?v ^;FTtӿg I}\hU)Y0!V eSp ?F͍)BlQz?x KH"8Ig7ۧ>5 6n g %i^e1˳1V5S=vЛ]`9AgGd8}q_Yeż/P:j>jnbMRX89aWۓAlXWQ8GxwLkט4s:e>?kP2eQY̯h/Y(,0 !#em&7}k*Y+уq{^Jg zdW0RӾ:o.ھA#lee1!"es;OMQ ̚1 uuNmåa UgalX"T*~1~qľH;{MexwlA(\mfabc/!X׿{y I:{f&F}j[JJ gpu7hTT 1F`p-k619ϼrF+B,WAcB9MN."Ճ%ߦYf ˼Ĺ9ڳ+ IscFhW3:rnEcD թQrо9yd#XjiȬp>@Ν*OȍE&M8v}-_m \ڴЫb c ̅d\*3$"6lIDQz(Ŷ9::ԩS9{46lQ~9!F#}rbt_~l3+GSP@ĺ-Pv* `]mOVFGmߟz#F`cmpm&N/3P>JV[RN&|',:beGFs~%'67?i~A %LJ۳k.Ο?ѣlll!Uxbw]B #76N :;hW|…Y wmZbSGtAAPPݻw=z]/S.>KJr o,%j*3\Y 3uCz!҈ 1j(=O?d1Џ^%':[[vy3 U~~~L4Ǜ4Bn*4 ¥KdƛS5bD\BAj:\ @ȈcV`bܼ쨘rM`` gϦ&'??M[j5+,۳Vx5#ƈu?iI#k6O(=+DZ?MfN6Evd4L |r6mjOa\222lb+6vJ>]'ڶ~$rJRp' kFTلNuh2cSp| `}#u!!!˗?Q˫_AƍjC+|` %wH>qoєq*˒*rNE]`BLh]߸qy׉4l؈?7E#'W}3lCa*=aEq]ZNN9 sڷoCU9KKqBHI~J* siק&Anoa[ <6h捶X6|{=C)H3a @BCCYbfXh4ٽƫ/ė^2hkRONp $oT*{̛7JsKdž-T;{C 2Х bH.]Jӝꊷ7cǎeʔ)81qڵlJ;;>#:uh~ucYɈ`[P?GOOO|||2e ƍx/{7~iHC ZaИ!_B~WN6lŋԩQڴ.]bp{ ðRi3&HGld'RNj׮˫8zE!Q~/?dQˀeM=gyIh jCPn:n߾ͨQfӦM4j֚C3w""\w%/EC XBų5{:עBK/qEyz3AѰoy}{&"]:v3]n߾'|bCuPIRa 66xw?%h | .Ovtڕ_|tV֙<~mg8a<ΟgY88x5O6mXl,ց{wq)XNA.Ņu֑ĸqhٲ%#Gd֬Y[`snRR+֬f_Ea@~eƠN>ʹi8s zȮкBF,>,>ݻhIC*!Z^^^ݻHFŎ;9s&G6Rdd$sǑ#Gpqscڏy} ׯnܽ1rHFwtg){*Ț QwX?F%88#Gc>CZll=Ο?OCڳ;7"#o8JEy"mDαWvV`y+q~%ao1h|-8y$cҤIvZZli[9œI˖-ԭ[W6.fK'9 &"= g;58sqy6o̠Aߨ]KHHmI)y'))O=Ů/RaάEXRꔍU|Pj"vE%*lD3f W^]ygHMMނ>۰Ǟlǔxvqk,*a)"^UЛ}֎BSx}Ife^ҪÓ,Z_©0V,^bfY`͜={K:^YwFZdc ;q<֦ YZ[XD&Ϙ]:/p9T *%d&L>Kǎ-k90F\\\DEE1rX>z6¥K( yۤ \)%j$RSS5jw6 }ꄵ$U8˯L:-7x&\xkT*'O$%%Z,_wXλլVکEɩڜ]fR{H=[ZP(LTV4hׯ_z)% s3mFXz 0Ν˹sxJMhQfB#0J^^;w$??_{|}}+C\at#ַ~ˍ7t^T2l0S!ۿ kmۦhդI^}USgQWuۛKbggg2{ 1xĪqaK׶y޽lRz^#,]0աCJCB;c2;ǐ wX2#oA:WtV*?d6~RKL-,???vMHH޸ !>ޚ9>#@ڶm[n޼ɯҥKqs+YPRQP`ؾ˪B!759(Z*ss^|"y IEזw(dd\GO<[0x3ScaҴiSj5yyl l -)PQc[B& G!P Vh 5KV:'4vvb~?y?0+GG2stY6::NH}G,GreK ;;)zMZvKuh4oy%,A83` *)2l%Q&|uFߢC^RJB@QlWӯ%T6c; GG2ٳfCJDQɻn6֥XUaVrdSmwQ< #]{TX%,BH*K:nש;b`ч +•" HC{4to[WJ/6M իWt<5 ᆪDHm] 2Oqa:9ޑ.%-1G,BP`kk~-]t~%"JƱ'?5԰s~nҿžNŏU88ܠ^]CճdFGJMM-*^R'##n޼I^L^Iv1|0z7Vo$;R[BTޢYpCyT":o5*u iIFZ3f@A^^^.811+Vq=@UݕCuYSP, ~m֬YCZZդ5 G1-XUMN.Y#t^]*V*K\P`hMU\sW*B̑EmaUz.ɺ*+_tJENlSūS Z s<1yBk]UEq,@o^KH6XnQ( .zOTdߌ*uCsXVʕ+:_T*:T)HUcIc$ Y 2H8XwoTKʟHّg8ֵcI1bm߾] 0`Zdʅܚb䤝KɝNSwx!9 R$jvB:/^DՅ+Z6۷uΎ) USg]'dpnE]$*qnbm#͛7͛3gS>?&7yYWר|z<]*H oޫp/tw- `[48A7̄6%t#F ~ܹ%\n@({5jy |pчs|v<|zC Rc:a5lPZg'NΈ#Je. 38p {.uE8V `)aedd0a>\N_3j(VZUZooo VyKVz7RvÄvмºsO?4;w,w}WXO7b,QŜ KR1dNǏZ L0 lA֌XVHzz:{pNE-Jk~3̝;aÆ%w^TsϟOxxx4iѕfooO`` 'NdĉMd7`>Wx̙ ߷eРA^뭷J,*cƌa֬Y:OrcOhAN:ŤIJRqۗ񞜜̑#GJOB??9 ]RSS>|8ǎӹ!mV&MHh"7l#Jx{*"cYs@uNڨ;! F=ɳ:kvݻy׉4kyɊp"L}x7G'lK¾_()eYVQuR=%JNOPwԠ;:*UFFcƌÕƢ IKKCVx\1sgbo<nY#eF€W?+10 ́}IGs4q?bvՅ#Gкukݫx駍^ 9++̣!\j!zUɺŷrcFTYi@[*T.B݁ܘ;0c̼I:_^ڵken6cǎѹsg4 _Og;'Ʊn2ܴjSltQ]}܃Q1/]j |qtڵkGxxxr 8u:t3=tϿҨ2Uv=nEɳ}>uA ) 36ꊯ/s-G9dgg`2>۞]^~ZCCNs R\2 ʔz-<'(Y^a̙39rΣFam,^tl#;U"7l9ѱ!bfc׀^@MAG1'K&ܛ+KQZ^u&NWj߁̘.MWCSRMSkr&{A1PRs]YޣUsB&vve+ҁ/_9. p3g='R&~oDNjno/RNnՐG-԰{;+IIIL=W/2^׻O? dnl@nHOϣu ud£FiXGx?2V’RuvU渵}L6rcܴs@t11(,%2B,@J'V 7L{OC6+/.3\P=ޚ'cGy},2b\Cw?{;b࿀kV!B`B~} ~݋:*Y>9/TBիWycL"d=`{,9Rإ)HPѦӿ1a*1K+R!DFMi9o-̃"B:#jlf!^O^=2r"IX:ck2ȸtȍdvT@H9 #[MXMz!EJWGsۻdvcă,BUz˺=VF+Dn&so yr{e0 wѺ@Jo4;Do+,bfI 4 P=kPr]yHF@#phT6a j$͕pGmj$"nnahK8Bӭic1PՅ%y!rr eW>#XM1PxuzܚaܰM%zw>l$qRr mD8ֽsoiig/hK9XwP},Z(O./F\f5Ԡ$BH2P/`~ PR`IENDB`rio/man/figures/logo.svg0000644000176200001440000001454013334042226014746 0ustar liggesusers image/svg+xml RIO rio/man/arg_reconcile.Rd0000644000176200001440000000564714014451034014711 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/arg_reconcile.R \name{arg_reconcile} \alias{arg_reconcile} \title{Reconcile an argument list to any function signature.} \usage{ arg_reconcile( fun, ..., .args = alist(), .docall = FALSE, .include = c(), .exclude = c(), .remap = list(), .warn = TRUE, .error = "default", .finish = identity ) } \arguments{ \item{fun}{A function to which an argument list needs to be adapted. Use the unquoted name of the function. If it's in a different package then the fully qualified unquoted name (e.g. \code{utils::read.table})} \item{...}{An arbitrary list of named arguments (unnamed ones will be ignored). Arguments in \code{.args} are overridden by arguments of the same name (if any) in \code{...}} \item{.args}{A list or \code{alist} of named arguments, to be merged with \code{...}. Arguments in \code{.args} are overridden by arguments of the same name (if any) in \code{...}} \item{.docall}{If set to \code{TRUE} will not only clean up the arguments but also execute \code{fun} with those arguments (\code{FALSE} by default) and return the results} \item{.include}{Whitelist. If not empty, only arguments named here will be permitted, and only if they satisfy the conditions implied by the other arguments. Evaluated before \code{.remap}.} \item{.exclude}{Blacklist. If not empty, arguments named here will be removed even if they satisfy the conditions implied by the other arguments. Evaluated before \code{.remap}.} \item{.remap}{An optional named character vector or named list of character values for standardizing arguments that play the same role but have different names in different functions. Evaluated after \code{.exclude} and \code{.include}.} \item{.warn}{Whether to issue a warning message (default) when invalid arguments need to be discarded.} \item{.error}{If specified, should be the object to return in the event of error. This object will have the error as its \code{error} attribute. If not specified an ordinary error is thrown with an added hint on the documentation to read for troubleshooting. Ignored if \code{.docall} is \code{FALSE}. The point of doing this is fault-tolerance-- if this function is part of a lengthy process where you want to document an error but keep going, you can set \code{.error} to some object of a compatible type. That object will be returned in the event of error and will have as its \code{"error"} attribute the error object.} \item{.finish}{A function to run on the result before returning it. Ignored if \code{.docall} is \code{FALSE}.} } \value{ Either a named list or the result of calling \code{fun} with the supplied arguments } \description{ Adapt an argument list to a function excluding arguments that will not be recognized by it, redundant arguments, and un-named arguments. } rio/man/get_ext.Rd0000644000176200001440000000072213700163775013556 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/utils.R \name{get_ext} \alias{get_ext} \title{Get File Type from Extension} \usage{ get_ext(file) } \arguments{ \item{file}{A character string containing a filename, file path, or URL.} } \value{ A characters string containing a file type recognized by rio. } \description{ A utility function to retrieve the file type from a file extension (via its filename/path/URL) } rio/man/export_list.Rd0000644000176200001440000000335014135740455014472 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/export_list.R \name{export_list} \alias{export_list} \title{Export list of data frames to files} \usage{ export_list(x, file, ...) } \arguments{ \item{x}{A list of data frames to be written to files.} \item{file}{A character vector string containing a single file name with a \code{\%s} wildcard placeholder, or a vector of file paths for multiple files to be imported. If \code{x} elements are named, these will be used in place of \code{\%s}, otherwise numbers will be used; all elements must be named for names to be used.} \item{\dots}{Additional arguments passed to \code{\link{export}}.} } \value{ The name(s) of the output file(s) as a character vector (invisibly). } \description{ Use \code{\link{export}} to export a list of data frames to a vector of file names or a filename pattern. } \details{ \code{\link{export}} can export a list of data frames to a single multi-dataset file (e.g., an Rdata or Excel .xlsx file). Use \code{export_list} to export such a list to \emph{multiple} files. } \examples{ library('datasets') export(list(mtcars1 = mtcars[1:10,], mtcars2 = mtcars[11:20,], mtcars3 = mtcars[21:32,]), xlsx_file <- tempfile(fileext = ".xlsx") ) # import all worksheets mylist <- import_list(xlsx_file) # re-export as separate named files csv_files1 <- sapply(1:3, function(x) tempfile(fileext = paste0("-", x, ".csv"))) export_list(mylist, file = csv_files1) # re-export as separate files using a name pattern export_list(mylist, file = csv_files2 <- tempfile(fileext = "\%s.csv")) # cleanup unlink(xlsx_file) unlink(csv_files1) unlink(csv_files2) } \seealso{ \code{\link{import}}, \code{\link{import_list}}, \code{\link{export}} } rio/man/export.Rd0000644000176200001440000001774214135740277013453 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/export.R \name{export} \alias{export} \title{Export} \usage{ export(x, file, format, ...) } \arguments{ \item{x}{A data frame or matrix to be written into a file. Exceptions to this rule are that \code{x} can be a list of data frames if the output file format is an Excel .xlsx workbook, .Rdata file, or HTML file, or a variety of R objects if the output file format is RDS or JSON. See examples.) To export a list of data frames to multiple files, use \code{\link{export_list}} instead.} \item{file}{A character string naming a file. Must specify \code{file} and/or \code{format}.} \item{format}{An optional character string containing the file format, which can be used to override the format inferred from \code{file} or, in lieu of specifying \code{file}, a file with the symbol name of \code{x} and the specified file extension will be created. Must specify \code{file} and/or \code{format}. Shortcuts include: \dQuote{,} (for comma-separated values), \dQuote{;} (for semicolon-separated values), \dQuote{|} (for pipe-separated values), and \dQuote{dump} for \code{\link[base]{dump}}.} \item{\dots}{Additional arguments for the underlying export functions. This can be used to specify non-standard arguments. See examples.} } \value{ The name of the output file as a character string (invisibly). } \description{ Write data.frame to a file } \details{ This function exports a data frame or matrix into a file with file format based on the file extension (or the manually specified format, if \code{format} is specified). The output file can be to a compressed directory, simply by adding an appropriate additional extensiont to the \code{file} argument, such as: \dQuote{mtcars.csv.tar}, \dQuote{mtcars.csv.zip}, or \dQuote{mtcars.csv.gz}. \code{export} supports many file formats. See the documentation for the underlying export functions for optional arguments that can be passed via \code{...} \itemize{ \item Comma-separated data (.csv), using \code{\link[data.table]{fwrite}} or, if \code{fwrite = TRUE}, \code{\link[utils]{write.table}} with \code{row.names = FALSE}. \item Pipe-separated data (.psv), using \code{\link[data.table]{fwrite}} or, if \code{fwrite = TRUE}, \code{\link[utils]{write.table}} with \code{sep = '|'} and \code{row.names = FALSE}. \item Tab-separated data (.tsv), using \code{\link[data.table]{fwrite}} or, if \code{fwrite = TRUE}, \code{\link[utils]{write.table}} with \code{row.names = FALSE}. \item SAS (.sas7bdat), using \code{\link[haven]{write_sas}}. \item SAS XPORT (.xpt), using \code{\link[haven]{write_xpt}}. \item SPSS (.sav), using \code{\link[haven]{write_sav}} \item SPSS compressed (.zsav), using \code{\link[haven]{write_sav}} \item Stata (.dta), using \code{\link[haven]{write_dta}}. Note that variable/column names containing dots (.) are not allowed and will produce an error. \item Excel (.xlsx), using \code{\link[openxlsx]{write.xlsx}}. Existing workbooks are overwritten unless \code{which} is specified, in which case only the specified sheet (if it exists) is overwritten. If the file exists but the \code{which} sheet does not, data are added as a new sheet to the existing workbook. \code{x} can also be a list of data frames; the list entry names are used as sheet names. \item R syntax object (.R), using \code{\link[base]{dput}} (by default) or \code{\link[base]{dump}} (if \code{format = 'dump'}) \item Saved R objects (.RData,.rda), using \code{\link[base]{save}}. In this case, \code{x} can be a data frame, a named list of objects, an R environment, or a character vector containing the names of objects if a corresponding \code{envir} argument is specified. \item Serialized R objects (.rds), using \code{\link[base]{saveRDS}}. In this case, \code{x} can be any serializable R object. \item "XBASE" database files (.dbf), using \code{\link[foreign]{write.dbf}} \item Weka Attribute-Relation File Format (.arff), using \code{\link[foreign]{write.arff}} \item Fixed-width format data (.fwf), using \code{\link[utils]{write.table}} with \code{row.names = FALSE}, \code{quote = FALSE}, and \code{col.names = FALSE} \item gzip comma-separated data (.csv.gz), using \code{\link[utils]{write.table}} with \code{row.names = FALSE} \item \href{https://github.com/csvy}{CSVY} (CSV with a YAML metadata header) using \code{\link[data.table]{fwrite}}. \item Apache Arrow Parquet (.parquet), using \code{\link[arrow]{write_parquet}} \item Feather R/Python interchange format (.feather), using \code{\link[feather]{write_feather}} \item Fast storage (.fst), using \code{\link[fst]{write.fst}} \item JSON (.json), using \code{\link[jsonlite]{toJSON}}. In this case, \code{x} can be a variety of R objects, based on class mapping conventions in this paper: \href{https://arxiv.org/abs/1403.2805}{https://arxiv.org/abs/1403.2805}. \item Matlab (.mat), using \code{\link[rmatio]{write.mat}} \item OpenDocument Spreadsheet (.ods), using \code{\link[readODS]{write_ods}}. (Currently only single-sheet exports are supported.) \item HTML (.html), using a custom method based on \code{\link[xml2]{xml_add_child}} to create a simple HTML table and \code{\link[xml2]{write_xml}} to write to disk. \item XML (.xml), using a custom method based on \code{\link[xml2]{xml_add_child}} to create a simple XML tree and \code{\link[xml2]{write_xml}} to write to disk. \item YAML (.yml), using \code{\link[yaml]{as.yaml}} \item Clipboard export (on Windows and Mac OS), using \code{\link[utils]{write.table}} with \code{row.names = FALSE} } When exporting a data set that contains label attributes (e.g., if imported from an SPSS or Stata file) to a plain text file, \code{\link{characterize}} can be a useful pre-processing step that records value labels into the resulting file (e.g., \code{export(characterize(x), "file.csv")}) rather than the numeric values. Use \code{\link{export_list}} to export a list of dataframes to separate files. } \examples{ library("datasets") # specify only `file` argument export(mtcars, f1 <- tempfile(fileext = ".csv")) \dontrun{ wd <- getwd() setwd(tempdir()) # Stata does not recognize variables names with '.' export(mtcars, f2 <- tempfile(fileext = ".dta")) # specify only `format` argument f2 \%in\% tempdir() export(mtcars, format = "stata") "mtcars.dta" \%in\% dir() setwd(wd) } # specify `file` and `format` to override default format export(mtcars, file = f3 <- tempfile(fileext = ".txt"), format = "csv") # export multiple objects to Rdata export(list(mtcars = mtcars, iris = iris), f4 <- tempfile(fileext = ".rdata")) export(c("mtcars", "iris"), f4) # export to non-data frame R object to RDS or JSON export(mtcars$cyl, f5 <- tempfile(fileext = ".rds")) export(list(iris, mtcars), f6 <- tempfile(fileext = ".json")) # pass arguments to underlying export function export(mtcars, f7 <- tempfile(fileext = ".csv"), col.names = FALSE) # write data to .R syntax file and append additional data export(mtcars, file = f8 <- tempfile(fileext = ".R"), format = "dump") export(mtcars, file = f8, format = "dump", append = TRUE) source(f8, echo = TRUE) # write to an Excel workbook \dontrun{ ## export a single data frame export(mtcars, f9 <- tempfile(fileext = ".xlsx")) ## export NAs to Excel as missing via args passed to `...` mtcars$drat <- NA_real_ mtcars \%>\% export(f10 <- tempfile(fileext = ".xlsx"), keepNA = TRUE) ## export a list of data frames as worksheets export(list(a = mtcars, b = iris), f11 <- tempfile(fileext = ".xlsx")) ## export, adding a new sheet to an existing workbook export(iris, f12 <- tempfile(fileext = ".xlsx"), which = "iris") } # write data to a zip-compressed CSV export(mtcars, f13 <- tempfile(fileext = ".csv.zip")) # cleanup unlink(f1) # unlink(f2) unlink(f3) unlink(f4) unlink(f5) unlink(f6) unlink(f7) unlink(f8) # unlink(f9) # unlink(f10) # unlink(f11) # unlink(f12) # unlink(f13) } \seealso{ \code{\link{.export}}, \code{\link{characterize}}, \code{\link{import}}, \code{\link{convert}}, \code{\link{export_list}} } rio/DESCRIPTION0000644000176200001440000000657314146644522012574 0ustar liggesusersPackage: rio Type: Package Title: A Swiss-Army Knife for Data I/O Version: 0.5.29 Date: 2021-11-08 Authors@R: c(person("Jason", "Becker", role = "ctb", email = "jason@jbecker.co"), person("Chung-hong", "Chan", role = "aut", email = "chainsawtiney@gmail.com", comment = c(ORCID = "0000-0002-6232-7530")), person("Geoffrey CH", "Chan", role = "ctb", email = "gefchchan@gmail.com"), person("Thomas J.", "Leeper", role = c("aut", "cre"), email = "thosjleeper@gmail.com", comment = c(ORCID = "0000-0003-4097-6326")), person("Christopher", "Gandrud", role = "ctb"), person("Andrew", "MacDonald", role = "ctb"), person("Ista", "Zahn", role = "ctb"), person("Stanislaus", "Stadlmann", role = "ctb"), person("Ruaridh", "Williamson", role = "ctb", email = "ruaridh.williamson@gmail.com"), person("Patrick", "Kennedy", role = "ctb"), person("Ryan", "Price", email = "ryapric@gmail.com", role = "ctb"), person("Trevor L", "Davis", email = "trevor.l.davis@gmail.com", role = "ctb"), person("Nathan", "Day", email = "nathancday@gmail.com", role = "ctb"), person("Bill", "Denney", email="wdenney@humanpredictions.com", role="ctb", comment=c(ORCID="0000-0002-5759-428X")), person("Alex", "Bokov", email = "alex.bokov@gmail.com", role = "ctb", comment=c(ORCID="0000-0002-0511-9815")) ) Description: Streamlined data import and export by making assumptions that the user is probably willing to make: 'import()' and 'export()' determine the data structure from the file extension, reasonable defaults are used for data import and export (e.g., 'stringsAsFactors=FALSE'), web-based import is natively supported (including from SSL/HTTPS), compressed files can be read directly without explicit decompression, and fast import packages are used where appropriate. An additional convenience function, 'convert()', provides a simple method for converting between file types. URL: https://github.com/leeper/rio BugReports: https://github.com/leeper/rio/issues Depends: R (>= 2.15.0) Imports: tools, stats, utils, foreign, haven (>= 1.1.2), curl (>= 0.6), data.table (>= 1.9.8), readxl (>= 0.1.1), openxlsx, tibble Suggests: datasets, bit64, testthat, knitr, magrittr, arrow, clipr, feather, fst, hexView, jsonlite, pzfx, readODS (>= 1.6.4), readr, rmarkdown, rmatio, xml2 (>= 1.2.0), yaml License: GPL-2 VignetteBuilder: knitr Encoding: UTF-8 RoxygenNote: 7.1.1 NeedsCompilation: no Packaged: 2021-11-08 13:36:39 UTC; THOMAS Author: Jason Becker [ctb], Chung-hong Chan [aut] (), Geoffrey CH Chan [ctb], Thomas J. Leeper [aut, cre] (), Christopher Gandrud [ctb], Andrew MacDonald [ctb], Ista Zahn [ctb], Stanislaus Stadlmann [ctb], Ruaridh Williamson [ctb], Patrick Kennedy [ctb], Ryan Price [ctb], Trevor L Davis [ctb], Nathan Day [ctb], Bill Denney [ctb] (), Alex Bokov [ctb] () Maintainer: Thomas J. Leeper Repository: CRAN Date/Publication: 2021-11-22 07:40:02 UTC rio/build/0000755000176200001440000000000014142223745012146 5ustar liggesusersrio/build/vignette.rds0000644000176200001440000000031614142223745014505 0ustar liggesusersb```b`a@&0`b fd`aEzA)h¢y%E)%y % @uh8@z3JrsYfm B$bt׸楀aީE0=(jؠjX2sRad9.nP&c0Gq?gQ~nݣ9JI,IK+rio/tests/0000755000176200001440000000000012642247627012221 5ustar liggesusersrio/tests/test-all.R0000644000176200001440000000011312642247627014064 0ustar liggesuserslibrary("testthat") library("rio") test_check("rio", reporter = "summary") rio/tests/testthat/0000755000176200001440000000000014014456223014046 5ustar liggesusersrio/tests/testthat/test_format_dta.R0000644000176200001440000000300614014451346017350 0ustar liggesuserscontext("Stata (.dta) imports/exports") require("datasets") test_that("Export to Stata", { expect_true(export(mtcars, "mtcars.dta") %in% dir()) mtcars3 <- mtcars names(mtcars3)[1] <- "foo.bar" expect_error(export(mtcars3, "mtcars3.dta"), label = "Export fails on invalid Stata names") }) test_that("Import from Stata (read_dta)", { expect_true(is.data.frame(import("mtcars.dta", haven = TRUE))) # arguments ignored expect_warning(import("mtcars.dta", haven = TRUE, extraneous.argument = TRUE)) # tell arg_reconcile() to not show warnings expect_silent(import("mtcars.dta", haven = TRUE, extraneous.argument = TRUE, .warn = FALSE)) }) test_that("Import from Stata with extended Haven features (read_dta)", { expect_true(is.data.frame(mtcars_wtam <- import("mtcars.dta", haven = TRUE, col_select = c('wt', 'am'), n_max = 10))) expect_identical(c(10L,2L), dim(mtcars_wtam)) expect_identical(c('wt', 'am'), names(mtcars_wtam)) }) test_that("Import from Stata (read.dta)", { test1 <- try(import("http://www.stata-press.com/data/r12/auto.dta", haven = FALSE)) if (!inherits(test1, "try-error")) { expect_true(inherits(test1, "data.frame")) } expect_error( is.data.frame(import("mtcars.dta", haven = FALSE)), label = "foreign::read.dta cannot read newer Stata files" ) }) unlink("mtcars.dta") rio/tests/testthat/test_format_arff.R0000644000176200001440000000041512660645506017527 0ustar liggesuserscontext("Weka (.arff) imports/exports") require("datasets") test_that("Export to Weka", { expect_true(export(iris, "iris.arff") %in% dir()) }) test_that("Import from Weka", { expect_true(is.data.frame(import("iris.arff"))) }) unlink("iris.arff") rio/tests/testthat/test_is_file_text.R0000644000176200001440000000454214142223624017712 0ustar liggesuserscontext("correctly identifying files as text vs binary") require("datasets") txtformats <- c("arff", "csv", "csv2", "dump", "fwf", "psv", "r", "tsv", "txt") binformats <- c("dbf", "dta", "rda", "rdata", "rds", "sas7bdat", "sav", "xlsx", "xpt") names(iris) <- gsub("\\.", "_", names(iris)) test_that("Required text formats recognized as text", { for (xx in txtformats) { expect_true(is_file_text(export(iris, paste0("iris.", xx))), label = paste0(xx, " should be text")) } }) test_that("Required non-text formats recognized as non-text", { for (xx in binformats) { expect_false(is_file_text(export(iris, paste0("iris.", xx))), label = paste0(xx, " should not be text")) } }) test_that("csvy recognized as text", { skip_if_not_installed(pkg = "data.table") expect_true(is_file_text(export(iris, "iris.csvy"))) }) test_that("xml and html recognized as text", { skip_if_not_installed(pkg = "xml2") expect_true(is_file_text(export(iris, "iris.xml"))) expect_true(is_file_text(export(iris, "iris.html"))) }) test_that("json recognized as text", { skip_if_not_installed(pkg = "jsonlite") expect_true(is_file_text(export(iris, "iris.json"))) }) test_that("yml recognized as text", { skip_if_not_installed(pkg = "yaml") expect_true(is_file_text(export(iris, "iris.yml"))) }) test_that("pzfx recognized as text", { skip("failing mysteriously") # skip_if_not_installed(pkg = "pzfx") expect_true(is_file_text(export(iris[,-5], "iris.pzfx"))) }) test_that("matlab recognized as binary", { skip("failing mysteriously") # skip_if_not_installed(pkg = "rmatio") expect_false(is_file_text(export(iris, "iris.matlab"))) }) test_that("ods recognized as binary", { skip_if_not_installed(pkg = "readODS") expect_false(is_file_text(export(iris, "iris.ods"))) }) test_that("fst recognized as binary", { skip_if_not_installed(pkg = "fst") expect_false(is_file_text(export(iris, "iris.fst"))) }) test_that("feather recognized as binary", { skip_if_not_installed(pkg = "feather") expect_false(is_file_text(export(iris, "iris.feather"))) }) unlink(paste0("iris.", c(txtformats, binformats, "csvy", "xml", "html", "json", "yml", "pzfx", "matlab", "ods", "fst", "feather"))) rm(iris, txtformats, binformats) rio/tests/testthat/test_format_syd.R0000644000176200001440000000033713577132026017407 0ustar liggesuserscontext("Systat (.syd) imports/exports") test_that("Import from Systat", { skip_if_not_installed("foreign") expect_true(inherits(import(system.file("files/Iris.syd", package="foreign")[1]), "data.frame")) }) rio/tests/testthat/test_format_psv.R0000644000176200001440000000037712660644465017433 0ustar liggesuserscontext("PSV imports/exports") require("datasets") test_that("Export to PSV", { expect_true(export(iris, "iris.psv") %in% dir()) }) test_that("Import from TSV", { expect_true(is.data.frame(import("iris.psv"))) }) unlink("iris.psv") rio/tests/testthat/test_import_list.R0000644000176200001440000000776113577230343017616 0ustar liggesuserscontext("Test import_list()") library("datasets") export(list(mtcars = mtcars, iris = iris), "data.rdata") export(mtcars, "mtcars.rds") test_that("Data identical (import_list)", { expect_equivalent(import_list(rep("mtcars.rds", 2)), list(mtcars, mtcars)) mdat <- rbind(mtcars, mtcars) dat <- import_list(rep("mtcars.rds", 2), rbind = TRUE) expect_true(ncol(dat) == ncol(mdat) + 1) expect_true(nrow(dat) == nrow(mdat)) expect_true("_file" %in% names(dat)) }) test_that("Import multi-object .Rdata in import_list()", { dat <- import_list("data.rdata") expect_true(identical(dat[[1]], mtcars)) expect_true(identical(dat[[2]], iris)) }) test_that("Import multiple HTML tables in import_list()", { dat <- import_list(system.file("examples", "twotables.html", package = "rio")) expect_true(identical(dim(dat[[1]]), dim(mtcars))) expect_true(identical(names(dat[[1]]), names(mtcars))) expect_true(identical(dim(dat[[2]]), dim(iris))) expect_true(identical(names(dat[[2]]), names(iris))) }) test_that("import_list() preserves 'which' names when specified", { export(list(a = mtcars, b = iris), "foo.xlsx") expect_true(identical(names(import_list("foo.xlsx")), c("a", "b"))) expect_true(identical(names(import_list("foo.xlsx", which = 1)), "a")) expect_true(identical(names(import_list("foo.xlsx", which = "a")), "a")) expect_true(identical(names(import_list("foo.xlsx", which = 2)), "b")) expect_true(identical(names(import_list("foo.xlsx", which = "b")), "b")) expect_true(identical(names(import_list("foo.xlsx", which = 1:2)), c("a", "b"))) expect_true(identical(names(import_list("foo.xlsx", which = 2:1)), c("b", "a"))) expect_true(identical(names(import_list("foo.xlsx", which = c("a", "b"))), c("a", "b"))) expect_true(identical(names(import_list("foo.xlsx", which = c("b", "a"))), c("b", "a"))) unlink("foo.xlsx") }) test_that("Import single file via import_list()", { expect_true(identical(import_list("mtcars.rds", rbind = TRUE), mtcars)) }) test_that("Import single file from zip via import_list()", { export(mtcars, "mtcars.csv.zip", format = "csv") expect_true(inherits(import_list("mtcars.csv.zip")[[1L]], "data.frame")) expect_true(inherits(import_list("mtcars.csv.zip", which = 1)[[1L]], "data.frame")) expect_true(inherits(import_list("mtcars.csv.zip", which = "mtcars.csv")[[1L]], "data.frame")) }) test_that("Using setclass in import_list()", { dat1 <- import_list(rep("mtcars.rds", 2), setclass = "data.table", rbind = TRUE) expect_true(inherits(dat1, "data.table")) dat2 <- import_list(rep("mtcars.rds", 2), setclass = "tbl", rbind = TRUE) expect_true(inherits(dat2, "tbl")) }) test_that("Object names are preserved by import_list()", { export(list(mtcars1 = mtcars[1:10,], mtcars2 = mtcars[11:20,], mtcars3 = mtcars[21:32,]), "mtcars.xlsx") export(mtcars[1:10,], "mtcars1.csv") export(mtcars[11:20,], "mtcars2.tsv") export(mtcars[21:32,], "mtcars3.csv") expected_names <- c("mtcars1", "mtcars2", "mtcars3") dat_xls <- import_list("mtcars.xlsx") dat_csv <- import_list(c("mtcars1.csv","mtcars2.tsv","mtcars3.csv")) expect_identical(names(dat_xls), expected_names) expect_identical(names(dat_csv), expected_names) unlink(c("mtcars.xlsx", "mtcars1.csv","mtcars2.tsv","mtcars3.csv")) }) test_that("File names are added as attributes by import_list()", { export(mtcars[1:10,], "mtcars.csv") export(mtcars[11:20,], "mtcars.tsv") expected_names <- c("mtcars", "mtcars") expected_attrs <- c(mtcars = "mtcars.csv", mtcars = "mtcars.tsv") dat <- import_list(c("mtcars.csv","mtcars.tsv")) expect_identical(names(dat), expected_names) expect_identical(unlist(lapply(dat, attr, "filename")), expected_attrs) unlink(c("mtcars.csv", "mtcars.tsv")) }) unlink("data.rdata") unlink("mtcars.rds") unlink("mtcars.csv.zip")rio/tests/testthat/test_format_sas.R0000644000176200001440000000072113136145224017366 0ustar liggesuserscontext("SAS imports/exports") require("datasets") #test_that("Import from SAS (.sas7bdat)", {}) test_that("Export SAS (.xpt)", { expect_true(export(mtcars, "mtcars.xpt") %in% dir()) }) test_that("Import from SAS (.xpt)", { expect_true(inherits(import("mtcars.xpt"), "data.frame")) }) test_that("Export SAS (.sas7bdat)", { expect_true(export(mtcars, "mtcars.sas7bdat") %in% dir()) }) unlink("mtcars.sas7bdat") unlink("mtcars.xpt") rio/tests/testthat/test_format_feather.R0000644000176200001440000000055113535446305020226 0ustar liggesuserscontext("feather imports/exports") require("datasets") test_that("Export to feather", { skip_if_not_installed(pkg="feather") expect_true(export(iris, "iris.feather") %in% dir()) }) test_that("Import from feather", { skip_if_not_installed(pkg="feather") expect_true(is.data.frame(import("iris.feather"))) }) unlink("iris.feather") rio/tests/testthat/test_format_pzfx.R0000644000176200001440000000131514142223677017576 0ustar liggesuserscontext("pzfx imports/exports") require("datasets") iris_numeric <- iris iris_numeric$Species <- as.numeric(iris_numeric$Species) test_that("Export to pzfx", { skip_if_not_installed(pkg="pzfx") expect_true(export(iris_numeric, "iris.pzfx") %in% dir()) }) test_that("Import from pzfx", { skip_if_not_installed(pkg="pzfx") expect_true(is.data.frame(import("iris.pzfx"))) # Note that the dim test is only true as long as the data are exported with # write_pzfx(..., row_names=FALSE) which is the default in the export # method, but it is not default in pzfx::write_pzfx() expect_true(identical(dim(import("iris.pzfx")), dim(iris_numeric))) }) rm(iris_numeric) unlink("iris.pzfx") rio/tests/testthat/test_format_eviews.R0000644000176200001440000000031413535446305020107 0ustar liggesuserscontext("EViews import") test_that("Import from EViews", { skip_if_not_installed(pkg="hexView") expect_true(is.data.frame(suppressWarnings(import(hexView::hexViewFile("data4-1.wf1"))))) }) rio/tests/testthat/test_guess.R0000644000176200001440000000163013121277274016363 0ustar liggesuserscontext("Get File Extension") library("datasets") test_that("File extension converted correctly", { expect_that(get_ext("hello.csv"), equals("csv")) expect_that(get_ext("hello.CSV"), equals("csv")) expect_that(get_ext("hello.sav.CSV"), equals("csv")) expect_that(get_ext("clipboard"), equals("clipboard")) expect_error(get_ext(1L)) }) test_that("Format converted correctly", { expect_that(get_type(","), equals("csv")) expect_that(get_type(";"), equals("csv2")) expect_that(get_type("|"), equals("psv")) expect_that(get_type("\t"), equals("tsv")) expect_that(get_type("excel"), equals("xlsx")) expect_that(get_type("stata"), equals("dta")) expect_that(get_type("spss"), equals("sav")) expect_that(get_type("sas"), equals("sas7bdat")) }) test_that("Export without file specified", { expect_true(export(iris, format = "csv") %in% dir()) unlink("iris.csv") }) rio/tests/testthat/test_identical.R0000644000176200001440000000435413535446305017201 0ustar liggesuserscontext("Check Data Identical") test_that("Data identical (text formats)", { expect_equivalent(import(export(mtcars, "mtcars.txt")), mtcars) expect_equivalent(import(export(mtcars, "mtcars.csv")), mtcars) expect_equivalent(import(export(mtcars, "mtcars.tsv")), mtcars) }) unlink("mtcars.txt") unlink("mtcars.csv") unlink("mtcars.tsv") test_that("Data identical (R formats)", { expect_equivalent(import(export(mtcars, "mtcars.rds")), mtcars) expect_equivalent(import(export(mtcars, "mtcars.R")), mtcars) expect_equivalent(import(export(mtcars, "mtcars.RData")), mtcars) expect_equivalent(import(export(mtcars, "mtcars.R", format = "dump")), mtcars) }) unlink("mtcars.rds") unlink("mtcars.R") unlink("mtcars.RData") test_that("Data identical (R formats), feather", { skip_if_not_installed(pkg="feather") expect_equivalent(import(export(mtcars, "mtcars.feather")), mtcars) unlink("mtcars.feather") }) test_that("Data identical (haven formats)", { expect_equivalent(import(export(mtcars, "mtcars.dta")), mtcars) expect_equivalent(import(export(mtcars, "mtcars.sav")), mtcars) }) unlink("mtcars.dta") unlink("mtcars.sav") test_that("Data identical (Excel formats)", { expect_equivalent(import(export(mtcars, "mtcars.xlsx")), mtcars) }) unlink("mtcars.xlsx") test_that("Data identical (other formats)", { expect_equivalent(import(export(mtcars, "mtcars.dbf")), mtcars) expect_equivalent(import(export(mtcars, "mtcars.json")), mtcars) expect_equivalent(import(export(mtcars, "mtcars.arff")), mtcars) expect_equivalent(import(export(mtcars, "mtcars.xml")), mtcars) }) unlink("mtcars.dbf") unlink("mtcars.json") unlink("mtcars.arff") unlink("mtcars.xml") test_that("Data identical (optional arguments)", { #expect_equivalent(import(export(mtcars, "mtcars.csv", format = "csv2"), format = "csv2"), mtcars) expect_equivalent(import(export(mtcars, "mtcars.csv"), nrows = 4), mtcars[1:4,]) expect_equivalent(import(export(mtcars, "mtcars.csv", format = "tsv"), format = "tsv"), mtcars) expect_true(all.equal(import(export(mtcars, "mtcars", format = "csv"), format = "csv"), mtcars, check.attributes = FALSE)) }) unlink("mtcars.csv") unlink("mtcars") rio/tests/testthat/test_format_sav.R0000644000176200001440000000522214015023602017362 0ustar liggesuserscontext("SPSS (.sav) imports/exports") require("datasets") mtcars2 <- mtcars # label and value labels mtcars2[["cyl"]] <- factor(mtcars2[["cyl"]], c(4, 6, 8), c("four", "six", "eight")) attr(mtcars2[["cyl"]], "label") <- "cylinders" # value labels only mtcars2[["am"]] <- factor(mtcars2[["am"]], c(0, 1), c("automatic", "manual")) # variable label only attr(mtcars2[["mpg"]], "label") <- "miles per gallon" test_that("Export to SPSS (.sav)", { expect_true(export(mtcars2, "mtcars.sav") %in% dir()) }) test_that("Export to SPSS compressed (.zsav)", { expect_true(export(mtcars2, "mtcars.zsav") %in% dir()) }) #test_that("Import from SPSS (.sav; read.spss)", { # expect_true(is.data.frame(import("mtcars2.sav", haven = FALSE))) #}) test_that("Import from SPSS (.sav; read_sav)", { expect_true(d <- is.data.frame(import("mtcars.sav", haven = TRUE))) expect_true(!"labelled" %in% unlist(lapply(d, class))) rm(d) }) test_that("Import from SPSS (.zsav; read_sav)", { expect_true(d <- is.data.frame(import("mtcars.zsav"))) expect_true(!"labelled" %in% unlist(lapply(d, class))) rm(d) }) test_that("Variable label and value labels preserved on SPSS (.sav) roundtrip", { d <- import("mtcars.sav") a_cyl <- attributes(d[["cyl"]]) expect_true("label" %in% names(a_cyl)) expect_true("labels" %in% names(a_cyl)) expect_true(identical(a_cyl[["label"]], "cylinders")) expect_true(identical(a_cyl[["labels"]], stats::setNames(c(1.0, 2.0, 3.0), c("four", "six", "eight")))) a_am <- attributes(d[["am"]]) expect_true("labels" %in% names(a_am)) expect_true(identical(a_am[["labels"]], stats::setNames(c(1.0, 2.0), c("automatic", "manual")))) a_mpg <- attributes(d[["mpg"]]) expect_true("label" %in% names(a_mpg)) expect_true(identical(a_mpg[["label"]], "miles per gallon")) }) test_that("Variable label and value labels preserved on SPSS compressed (.zsav) roundtrip", { d <- import("mtcars.zsav") a_cyl <- attributes(d[["cyl"]]) expect_true("label" %in% names(a_cyl)) expect_true("labels" %in% names(a_cyl)) expect_true(identical(a_cyl[["label"]], "cylinders")) expect_true(identical(a_cyl[["labels"]], stats::setNames(c(1.0, 2.0, 3.0), c("four", "six", "eight")))) a_am <- attributes(d[["am"]]) expect_true("labels" %in% names(a_am)) expect_true(identical(a_am[["labels"]], stats::setNames(c(1.0, 2.0), c("automatic", "manual")))) a_mpg <- attributes(d[["mpg"]]) expect_true("label" %in% names(a_mpg)) expect_true(identical(a_mpg[["label"]], "miles per gallon")) }) unlink("mtcars.sav") unlink("mtcars.zsav")rio/tests/testthat/test_format_dbf.R0000644000176200001440000000064413577132026017344 0ustar liggesuserscontext("XBASE (.dbf) imports/exports") require("datasets") test_that("Export to XBASE (.dbf)", { skip_if_not_installed("foreign") expect_true(export(iris, "iris.dbf") %in% dir()) }) test_that("Import from XBASE (.dbf)", { skip_if_not_installed("foreign") d <- import("iris.dbf") expect_true(is.data.frame(d)) expect_true(!"factor" %in% sapply(d, class)) }) unlink("iris.dbf") rio/tests/testthat/test_format_parquet.R0000644000176200001440000000051613653614607020275 0ustar liggesuserscontext("Parquet imports/exports") require("datasets") test_that("Export to and import from parquet", { skip_if_not_installed("arrow") skip_if_not(arrow::arrow_available()) expect_true(export(iris, "iris.parquet") %in% dir()) expect_true(is.data.frame(import("iris.parquet"))) unlink("iris.parquet") }) rio/tests/testthat/test_format_dif.R0000644000176200001440000000032713106305755017350 0ustar liggesuserscontext("DIF imports/exports") test_that("Import from DIF", { dd <- import(file.path(system.file("misc", package = "utils"), "exDIF.dif"), transpose = TRUE) expect_true(inherits(dd, "data.frame")) }) rio/tests/testthat/test_format_matlab.R0000644000176200001440000000075414141525663020054 0ustar liggesuserscontext("rmatio imports/exports") require("datasets") test_that("Export to matlab", { skip("failing mysteriously") # skip_if_not_installed(pkg="rmatio") expect_true(export(iris, "iris.matlab") %in% dir()) }) test_that("Import from matlab", { skip("failing mysteriously") # skip_if_not_installed(pkg="rmatio") expect_true(is.data.frame(import("iris.matlab"))) expect_true(identical(dim(import("iris.matlab")), dim(iris))) }) unlink("iris.matlab") rio/tests/testthat/files/0000755000176200001440000000000014014451034015143 5ustar liggesusersrio/tests/testthat/files/two-tbody.html0000644000176200001440000000026114014451034017760 0ustar liggesusers
DatasetDescription
COComments
rio/tests/testthat/files/br-in-td.html0000644000176200001440000000025714014451034017451 0ustar liggesusers
RowSTUDYID
1
2
rio/tests/testthat/files/br-in-header.html0000644000176200001440000000023714014451034020270 0ustar liggesusers

Date
1December 15, 2003 13:14:17.123
rio/tests/testthat/files/th-as-row-element.html0000644000176200001440000000025514014451034021303 0ustar liggesusers
RowSTUDYID
1ABC
2ABC
rio/tests/testthat/test_export_list.R0000644000176200001440000000363513577230343017621 0ustar liggesuserscontext("Test export_list()") library("datasets") export(list(mtcars3 = mtcars[1:10,], mtcars2 = mtcars[11:20,], mtcars1 = mtcars[21:32,]), "mtcars.xlsx") mylist <- import_list("mtcars.xlsx") test_that("export_list() works", { expect_error(export_list(mtcars), label = "export_list() fails on exporting single data frame") expect_error(export_list(mylist, file = NULL), label = "export_list() fails when file is NULL") expect_true(identical(export_list(mylist, file = paste0("mtcars_", 3:1, ".csv")), paste0("mtcars_", 3:1, ".csv"))) expect_true(identical(export_list(mylist, file = "%s.csv"), paste0("mtcars", 3:1, ".csv"))) expect_true(all.equal(mylist[["mtcars1"]], import("mtcars1.csv"))) expect_true(all.equal(mylist[["mtcars2"]], import("mtcars2.csv"))) expect_true(all.equal(mylist[["mtcars3"]], import("mtcars3.csv"))) names(mylist) <- NULL expect_true(identical(export_list(mylist, file = "mtcars_%s.csv"), paste0("mtcars_", 1:3, ".csv"))) names(mylist) <- c("a", "", "c") expect_error(export_list(mylist), label = "export_list() fails without 'file' argument") expect_error(export_list(mylist, file = "%.csv"), label = "export_list() fails without missing names") expect_error(export_list(mylist, file = c("a.csv", "b.csv")), label = "export_list() fails with mismatched argument lengths") names(mylist) <- c("a", "a", "c") expect_error(export_list(mylist, file = "mtcars_%s.csv"), label = "export_list() fails with duplicated data frame names") expect_error(export_list(mylist, file = c("mtcars1.csv", "mtcars1.csv", "mtcars3.csv")), label = "export_list() fails with duplicated data frame names") }) unlink("mtcars.xlsx") unlink("mtcars1.csv") unlink("mtcars2.csv") unlink("mtcars3.csv") unlink("mtcars_1.csv") unlink("mtcars_2.csv") unlink("mtcars_3.csv") unlink("a.csv") unlink("b.csv") rio/tests/testthat/test_errors.R0000644000176200001440000000267513435247060016561 0ustar liggesuserscontext("Errors") library("datasets") test_that("Function suggestions for unsupported export", { expect_error(export(data.frame(1), "test.jpg"), "jpg format not supported. Consider using the 'jpeg::writeJPEG()' function", fixed = TRUE) }) test_that("Error for unsupported file types", { writeLines("123", con = "test.faketype") expect_error(import("test.faketype"), "Format not supported") expect_error(export(mtcars, "mtcars.faketype"), "Format not supported") expect_equal(get_type("faketype"), "faketype") expect_error(get_ext("noextension"), "'file' has no extension") unlink("test.faketype") }) test_that("Error for mixed support file types", { expect_error(import("test.por"), "No such file") expect_error(export(mtcars, "mtcars.por"), "Format not supported") expect_error(export(mtcars, "mtcars.faketype"), "Format not supported") }) test_that("Only export data.frame or matrix", { expect_error(export(1, "test.csv"), "'x' is not a data.frame or matrix") }) test_that("Column widths printed for fixed-width format", { expect_true(is.character(export(data.frame(1), "test.txt", format = "fwf", verbose = FALSE))) expect_message(export(data.frame(1), "test.txt", format = "fwf", verbose = TRUE)) unlink("test.txt") }) test_that("Warning for import_list() with missing file", { expect_warning(import_list("fake_file.csv")) }) rio/tests/testthat/test_install_formats.R0000644000176200001440000000130013324627613020431 0ustar liggesuserscontext("Install uninstalled formats") test_that("uninstalled_formats()", { skip_on_cran() formats <- uninstalled_formats() if (is.null(formats)) { expect_true(install_formats()) } else { expect_type(formats, "character") } }) test_that("install_formats()", { suggestions <- read.dcf(system.file("examples/example-DESCRIPTION", package = "rio", mustWork = TRUE), fields = "Suggests") suggestions <- parse_suggestions(suggestions) expect_true("NANTUCKET" %in% suggestions) expect_true("readODS" %in% suggestions) expect_false("devtools" %in% suggestions) }) rio/tests/testthat/test_extensions.R0000644000176200001440000000126413106304323017424 0ustar liggesuserscontext("Extensions") library("datasets") test_that("S3 extension mechanism works for imports", { write.csv(iris, 'iris.custom') expect_error(import("iris.custom")) .import.rio_custom <- function(file, ...){ read.csv(file, ...) } #expect_true(is.data.frame(import('iris.custom'))) rm(.import.rio_custom) }) test_that("S3 extension mechanism works for exports", { expect_error(export("iris.custom")) .export.rio_custom <- function(file, data, ...){ write.csv(data, file, ...) invisible(file) } expect_error(is.character(export(iris, "iris.custom"))) rm(.export.rio_custom) }) unlink("iris.custom") rio/tests/testthat/test_format_fwf.R0000644000176200001440000000322414141506404017361 0ustar liggesuserscontext("FWF imports/exports") require("datasets") test_that("Export to FWF", { expect_true(export(iris, "iris.fwf") %in% dir()) expect_true(export(iris, "iris.txt", format = "fwf") %in% dir()) }) test_that("Import from FWF (read.fwf)", { expect_true(is.data.frame(import("iris.fwf", widths = c(3,3,3,3,1)))) expect_true(is.data.frame(import("iris.fwf", widths = list(c(3,3,3,3,1))))) expect_true(is.data.frame(import("iris.fwf", widths = c(3,3,3,3,1), col.names = names(iris)))) expect_true(is.data.frame(import("iris.fwf", widths = c(3,3,3,3,1), col.names = names(iris), readr = TRUE))) expect_true(is.data.frame(import("iris.txt", widths = c(3,3,3,3,1), format = "fwf"))) }) test_that("Import from FWF (read_fwf)", { expect_true(is.data.frame(import("iris.fwf", widths = c(3,3,3,3,1), readr = TRUE))) expect_true(is.data.frame(import("iris.txt", widths = c(3,3,3,3,1), format = "fwf", readr = TRUE))) # negative column widths expect_true(is.data.frame(import("iris.fwf", widths = c(-3,3,3,3,1), readr = FALSE))) expect_true(is.data.frame(import("iris.fwf", widths = c(-3,3,3,3,1), readr = TRUE))) }) test_that("Import from FWF Errors", { expect_error(import("iris.fwf"), "Import of fixed-width format data requires a 'widths' argument. See ? read.fwf().", fixed = TRUE) # error on NULL widths expect_error(import("iris.fwf", widths = NULL, readr = FALSE)) # no error on NULL widths w/ readr::read_fwf() expect_true(suppressWarnings(is.data.frame(import("iris.fwf", widths = NULL, readr = TRUE)))) }) unlink("iris.fwf") unlink("iris.txt") rio/tests/testthat/test_gather_attrs.R0000644000176200001440000000350613577230343017731 0ustar liggesuserscontext("Gather attrs") e <- try(import("http://www.stata-press.com/data/r13/auto.dta")) if (!inherits(e, "try-error")) { g <- gather_attrs(e) test_that("Gather attrs from Stata", { expect_true(length(attributes(e[[1]])) >= 1) expect_true(length(attributes(g[[1]])) == 0) expect_true(length(attributes(e)) == 5) expect_true(length(attributes(g)) == 8) expect_true("label" %in% names(attributes(e[[1]]))) expect_true(!"label" %in% names(attributes(g[[1]]))) expect_true("label" %in% names(attributes(g))) expect_true("labels" %in% names(attributes(g))) expect_true("format.stata" %in% names(attributes(g))) expect_true(!"format.stata" %in% names(attributes(g[[1]]))) }) test_that("Spread attrs from Stata", { s <- spread_attrs(g) # df-level attributes expect_true("label" %in% names(attributes(s))) expect_true("notes" %in% names(attributes(s))) # spread attributes expect_true("format.stata" %in% names(attributes(s[[1]]))) expect_true(!"format.stata" %in% names(attributes(s))) expect_true("label" %in% names(attributes(s[[1]]))) expect_true(!"labels" %in% names(attributes(s))) }) test_that("Gather empty attributes", { require("datasets") g <- gather_attrs(iris) expect_true(length(attributes(iris[[1]])) == 0) expect_true(length(attributes(g[[1]])) == 0) expect_true(length(attributes(iris)) == 3) expect_true(length(attributes(g)) == 3) }) test_that("gather_attrs() fails on non-data frame", { expect_error(gather_attrs(letters)) }) test_that("spread_attrs() fails on non-data frame", { expect_error(spread_attrs(letters)) }) rm(e) } rio/tests/testthat/test_arg_reconcile.R0000644000176200001440000002324614014451034020026 0ustar liggesuserscontext("Reconcile user-supplied arguments with target function's call signature") require("datasets") require("tools") require("tibble") sharedargs <- alist(file = "iris.tsv", x = iris, sep = "\t", fileEncoding = "UTF-8", showProgress = FALSE, skip = 2, n_max = 4, stringsAsFactors = TRUE) fwrite_args0 <- alist(file = "iris.tsv", x = iris, sep = "\t", showProgress = FALSE) writetable_args0 <- alist(file = "iris.tsv", x = iris, sep = "\t", fileEncoding = "UTF-8") dta_args0 <- alist(data = iris, path = "iris.dta") sav_args0 <- alist(data = iris, path = "iris.sav") iris <- setNames(iris, sub(".", "", names(iris), fixed = TRUE)) export(iris, "iris_ref.tsv") export(iris, "iris_ref.dta") export(iris, "iris_ref.sav") test_that("hardcoded reference arguments are valid", { expect_silent(do.call(data.table::fwrite, fwrite_args0)) expect_silent(do.call(haven::write_dta, dta_args0)) }) test_that("remove duplicated arguments", { expect_identical(alist(file='iris.tsv', x = iris, sep = '\t', fileEncoding = 'UTF-8'), arg_reconcile(utils::write.table, file = "iris.tsv", x = iris, sep = "\t", fileEncoding = "UTF-8", sep = '\t')) }) test_that("warn on mismatched args and filter them out", { expect_warning(arg_reconcile(data.table::fwrite, file = "iris.tsv", x = iris, sep = "\t", fileEncoding = "UTF-8", showProgress = FALSE, skip = 2, n_max = 4), "fileEncoding") expect_warning(arg_reconcile(utils::write.table, file = "iris.tsv", x = iris, sep = "\t", fileEncoding = "UTF-8", showProgress = FALSE, skip = 2, n_max = 4), "showProgress") expect_warning(arg_reconcile(data.table::fwrite, .args = sharedargs), "fileEncoding") expect_warning(arg_reconcile(utils::write.table, .args = sharedargs), "showProgress") }) test_that("The whitelist and blacklist work", { expect_equal(nrow(iris) - sharedargs$skip, nrow(arg_reconcile(data.table::fread, .warn = FALSE, .args = sharedargs, .docall = TRUE, header = TRUE ))) expect_equal(nrow(iris), nrow(arg_reconcile(data.table::fread, .warn = FALSE, .args = sharedargs, .exclude = 'skip', .docall = TRUE, header = TRUE))) expect_equal(nrow(iris), nrow(arg_reconcile(data.table::fread, .warn = FALSE, .args = sharedargs, .include = c('file', 'sep', 'stringsAsFactors', 'showProgress'), .docall = TRUE, header = TRUE))) }) test_that("valid outputs with suppressed warnings", { expect_silent(fwrite_args1 <- arg_reconcile(data.table::fwrite, file = "iris.tsv", x = iris, sep = "\t", fileEncoding = "UTF-8", showProgress = FALSE, skip = 2, n_max = 4, .warn = FALSE)) expect_identical(fwrite_args0, fwrite_args1) expect_silent(writetable_args1 <- arg_reconcile(utils::write.table, file = "iris.tsv", x = iris, sep = "\t", fileEncoding = "UTF-8", showProgress = FALSE, skip = 2, n_max = 4, .warn = FALSE)) expect_identical(writetable_args0, writetable_args1) expect_silent(fwrite_args1 <- arg_reconcile(data.table::fwrite, .args = sharedargs, .warn = FALSE)) expect_identical(fwrite_args0, fwrite_args1) expect_silent(writetable_args1 <- arg_reconcile(utils::write.table, .args = sharedargs, .warn = FALSE)) expect_identical(writetable_args0, writetable_args1) }) test_that(".remap argument remaps argument names", { expect_warning(dta_args1 <- arg_reconcile(haven::write_dta, file = "iris.dta", x = iris, sep= "\t", fileEncoding = "UTF-8", showProgress = FALSE, skip = 2, n_max = 4, .remap = list(x = "data", file = "path")) ,"sep") expect_identical(dta_args0, dta_args1) expect_warning(sav_args1 <- arg_reconcile(haven::write_sav, file = "iris.sav", x = iris, sep = "\t", fileEncoding = "UTF-8", showProgress = FALSE, skip = 2, n_max = 4, .remap = list(x = "data", file = "path")) ,"sep") expect_identical(sav_args0, sav_args1) expect_warning(dta_args1 <- arg_reconcile(haven::write_dta, file = "iris.dta", .args = sharedargs, .remap = list(x = "data", file = "path")), "sep") expect_identical(dta_args0, dta_args1) expect_warning(sav_args1 <- arg_reconcile(haven::write_sav, file = "iris.sav", .args = sharedargs, .remap = list(x = "data", file = "path")), "sep") expect_identical(sav_args0, sav_args1) }) test_that(".docall works", { expect_equivalent(iris[1:4,], arg_reconcile(data.table::fread, file = "iris_ref.tsv", .warn = FALSE, .args = sharedargs, .docall = TRUE, .exclude = 'skip', .remap = list(n_max = "nrows"))) expect_equivalent(iris[1:4,], arg_reconcile(utils::read.table, file = "iris_ref.tsv", .warn = FALSE, .args = sharedargs, .docall = TRUE, .exclude = 'skip', header = TRUE, .remap = list(n_max = "nrows"))) expect_equivalent(iris[1:4,], arg_reconcile(utils::read.table, file = "iris_ref.tsv", .warn = FALSE, .args = sharedargs, .docall = TRUE, .exclude = 'skip', header = TRUE, .remap = list(n_max = "nrows"))) expect_equivalent(iris[3:6,1:4], arg_reconcile(haven::read_dta, file = "iris_ref.dta", .warn = FALSE, .args = sharedargs, .docall = TRUE)[,1:4]) expect_equivalent(iris[3:6,1:4], arg_reconcile(haven::read_sav, file = "iris_ref.sav", .warn = FALSE, .args = sharedargs, .docall = TRUE)[,1:4]) }) test_that("Error handling and silent return of customized error objects to avoid interrupting long-running processes for individual errors.", { expect_error(arg_reconcile(data.table::fread, file = 'iris_ref.xyz', .warn = FALSE, .args = sharedargs, .docall = TRUE, .remap = list(n_max = 'nrows')), 'does not exist or is non-readable') errobj <- arg_reconcile(data.table::fread, file = 'iris_ref.xyz', .warn = FALSE, .args = sharedargs, .docall = TRUE, .remap = list(n_max = 'nrows'), .error = data.table::data.table(NULL)) expect_s3_class(errobj, c('data.table', 'data.frame')) expect_identical(nrow(errobj), 0L) expect_s3_class(attr(errobj, 'error'), 'try-error') }) # TODO: .docall produces results identical to corresponding direct invokation # TODO: do.call on *_args0 produces results identical to corresponding direct invokation # cleanup unlink(c("iris_ref.*", "iris.tsv", "iris.dat", "iris.sav")) rio/tests/testthat/test_format_ods.R0000644000176200001440000000245514015023117017364 0ustar liggesuserscontext("ODS imports/exports") require("datasets") test_that("Import from ODS", { skip_if_not_installed(pkg="readODS") ods0 <- import(system.file("examples", "mtcars.ods", package = "rio")) expect_warning(ods <- import(system.file("examples", "mtcars.ods" , package = "rio"), sheet = 1, col_names = TRUE, path = 'ignored value', invalid_argument = 42), "The following arguments were ignored for read_ods:\ninvalid_argument, path", label = "ODS import ignores redundant and unknown arguments with a warning") expect_identical(ods0, ods, label = "ODS import ignored arguments don't affect output") expect_true(is.data.frame(ods), label = "ODS import returns data.frame") expect_true(identical(names(mtcars), names(ods)), label = "ODS import returns correct names") expect_true(identical(dim(mtcars), dim(ods)), label = "ODS import returns correct dimensions") expect_equivalent(ods, mtcars, label = "ODS import returns correct values") }) test_that("Export to ODS", { skip_if_not_installed(pkg="readODS") expect_true(export(iris, "iris.ods") %in% dir()) }) unlink("iris.ods") rio/tests/testthat/test_format_html.R0000644000176200001440000000333514014456040017545 0ustar liggesuserscontext("HTML imports/exports") require("datasets") test_that("Export to HTML", { skip_if_not_installed("xml2") expect_true(export(iris, "iris.html") %in% dir(), label = "export to html works") }) test_that("Export to HTML with ampersands",{ skip_if_not_installed("xml2") iris$`R & D` <- paste(sample(letters,nrow(iris),rep=T), '&', sample(LETTERS,nrow(iris),rep=TRUE)) expect_true(export(iris, "iris2.html") %in% dir(), label = "export to html with ampersands works") }) test_that("Import from HTML", { skip_if_not_installed("xml2") expect_true(is.data.frame(import("iris.html")), label = "import from single-table html works") f <- system.file("examples", "twotables.html", package = "rio") expect_true(all(dim(import(f, which = 1)) == c(32, 11)), label = "import from two-table html works (which = 1)") expect_true(all(dim(import(f, which = 2)) == c(150, 5)), label = "import from two-table html works (which = 2)") }) test_that("Import from HTML with multiple tbody elements", { skip_if_not_installed("xml2") expect_true(is.data.frame(import("files/two-tbody.html")), label="import with two tbody elements in a single html table works") expect_true(is.data.frame(import("files/br-in-header.html")), label="import with an empty header cell in an html table works") expect_true(is.data.frame(import("files/br-in-td.html")), label="import with an empty data cell in a single html table works") expect_true(is.data.frame(import("files/th-as-row-element.html")), label="import with th instead of td in a non-header row in a single html table works") }) unlink(c("iris.xml", "iris2.xml", "iris2.html")) rio/tests/testthat/test_format_xml.R0000644000176200001440000000154314142032523017376 0ustar liggesuserscontext("XML imports/exports") require("datasets") test_that("Export to XML", { skip_if_not_installed("xml2") skip("temporarily skipping (https://github.com/r-lib/xml2/issues/339)") expect_true(export(iris, "iris.xml") %in% dir())}) test_that("Export to XML with ampersands",{ skip_if_not_installed("xml2") skip("temporarily skipping (https://github.com/r-lib/xml2/issues/339)") iris$`R & D` <- paste(sample(letters,nrow(iris),rep=T), '&', sample(LETTERS,nrow(iris),rep=TRUE)) expect_true(export(iris, "iris2.xml") %in% dir()) }) test_that("Import from XML", { skip_if_not_installed("xml2") skip("temporarily skipping (https://github.com/r-lib/xml2/issues/339)") expect_true(is.data.frame(import("iris.xml"))) }) unlink(c("iris.xml","iris2.xml")) rio/tests/testthat/test_format_yml.R0000644000176200001440000000072414014451766017413 0ustar liggesuserscontext("YAML imports/exports") require("datasets") test_that("Export to YAML", { skip_if_not_installed("yaml") expect_true(export(iris, "iris.yml") %in% dir()) }) test_that("Import from YAML", { skip_if_not_installed("yaml") expect_true(is.data.frame(import("iris.yml"))) expect_identical(import("iris.yml")[, 1:4], iris[, 1:4]) expect_identical(import("iris.yml")$Species, as.character(iris$Species)) }) unlink("iris.yml") rio/tests/testthat/test_matrix.R0000644000176200001440000000075212660654204016544 0ustar liggesuserscontext("Matrix imports/exports") require("datasets") test_that("Export matrix to CSV", { expect_true(export(warpbreaks, "temp1.csv") %in% dir()) expect_true(export(as.matrix(warpbreaks), "temp2.csv") %in% dir()) }) test_that("Import from matrix export", { expect_true(identical(import("temp1.csv", colClasses = rep("character", 3)), import("temp2.csv", colClasses = rep("character", 3)))) }) unlink("temp1.csv") unlink("temp2.csv") rio/tests/testthat/test_format_xls.R0000644000176200001440000000310214014451034017375 0ustar liggesuserscontext("Excel (xlsx) imports/exports") require("datasets") test_that("Export to Excel (.xlsx)", { expect_true(export(iris, "iris.xlsx") %in% dir()) expect_true(export(mtcars, "iris.xlsx", which = 2) %in% dir()) }) test_that("Import from Excel (.xlsx)", { expect_true(is.data.frame(import("iris.xlsx", readxl = FALSE))) expect_true(is.data.frame(import("iris.xlsx", readxl = TRUE))) expect_true(is.data.frame(import("iris.xlsx", sheet = 1))) expect_true(is.data.frame(import("iris.xlsx", which = 1))) expect_true(nrow(import("iris.xlsx", n_max = 42))==42) expect_warning(is.data.frame(import("iris.xlsx", nrows = 42)), "nrows", label = "xlsx reads the file and ignores unused arguments with warning") }) test_that("Import from Excel (.xls)", { expect_true(is.data.frame(import(system.file('examples', 'iris.xls', package='rio')))) expect_true(is.data.frame(import(system.file('examples', 'iris.xls', package='rio'), sheet = 1))) expect_true(is.data.frame(import(system.file('examples', 'iris.xls', package='rio'), which = 1))) expect_warning(is.data.frame(import(system.file('examples', 'iris.xls', package='rio'), which = 1, nrows = 42)), "nrows", label="xls reads the file and ignores unused arguments with warning") }) unlink("iris.xlsx") rio/tests/testthat/test_format_rds.R0000644000176200001440000000133113552621765017401 0ustar liggesuserscontext("Rds imports/exports") require("datasets") test_that("Export to rds", { expect_true(export(iris, "iris.rds") %in% dir()) }) test_that("Import from rds", { expect_true(is.data.frame(import("iris.rds"))) expect_warning(import("iris.rds", invalid_argument=42), "File imported using readRDS. Arguments to '...' ignored.", label="rda imports and ignores unused arguments with a warning") }) test_that("Export to rds (non-data frame)", { expect_true(export(list(1:10, letters), "list.rds") %in% dir()) expect_true(inherits(import("list.rds"), "list")) expect_true(length(import("list.rds")) == 2L) }) unlink("iris.rds") unlink("list.rds") rio/tests/testthat/test_set_class.R0000644000176200001440000000255613121276751017224 0ustar liggesuserscontext("Set object class") library("datasets") mtcars_tibble <- tibble::as_tibble(mtcars) mtcars_datatable <- data.table::as.data.table(mtcars) test_that("Set object class", { expect_true(inherits(set_class(mtcars), "data.frame")) expect_true(inherits(set_class(mtcars_tibble), "data.frame")) expect_true(inherits(set_class(mtcars_datatable), "data.frame")) expect_true(inherits(set_class(mtcars, class = "fakeclass"), "data.frame")) expect_true(!"fakeclass" %in% class(set_class(mtcars, class = "fakeclass"))) }) test_that("Set object class as tibble", { expect_true(inherits(set_class(mtcars, class = "tbl_df"), "tbl_df")) expect_true(inherits(set_class(mtcars, class = "tibble"), "tbl_df")) expect_true(inherits(set_class(mtcars_tibble, class = "tibble"), "tbl_df")) }) test_that("Set object class as data.table", { expect_true(inherits(set_class(mtcars, class = "data.table"), "data.table")) export(mtcars, "mtcars.csv") expect_true(inherits(import("mtcars.csv", data.table = TRUE), "data.table")) expect_true(inherits(import("mtcars.csv", setclass = "data.table"), "data.table")) expect_true(inherits(import("mtcars.csv", data.table = TRUE, setclass = "data.table"), "data.table")) expect_warning(import("mtcars.csv", data.table = TRUE, setclass = "data.frame")) unlink("mtcars.csv") }) rio/tests/testthat/test_characterize.R0000644000176200001440000000222514014455065017700 0ustar liggesuserscontext("characterize()/factorize()") x <- structure(1:4, labels = c("A" = 1, "B" = 2, "C" = 3)) xdf <- data.frame(v1 = structure(1:4, labels = c("A" = 1, "B" = 2, "C" = 3), label = "variable 1"), v2 = structure(c(1,0,0,1), labels = c("foo" = 0, "bar" = 1)), v3 = 4:1, label = "variable 2") test_that("test characterize.default()", { expect_true(identical(characterize(x), c(LETTERS[1:3], NA))) }) test_that("test characterize.default()", { expect_true(identical(characterize(xdf), {xdf[] <- lapply(xdf, characterize); xdf})) }) test_that("test factorize.data.frame()", { expect_true(identical(factorize(x), factor(x, attributes(x)$labels, names(attributes(x)$labels)))) }) test_that("test factorize.data.frame()", { expect_true(identical(factorize(xdf), {xdf[] <- lapply(xdf, factorize); xdf})) }) test_that("test factorize coerce_character", { expect_true(identical(letters[1:3], factorize(letters[1:3]))) expect_true( identical( factorize(letters[3:1], coerce_character = TRUE), factor(letters[3:1], levels = letters[1:3]) ) ) })rio/tests/testthat/test_format_external_packages.R0000644000176200001440000000137713376607613022303 0ustar liggesuserscontext("External package Import/Export support") require("datasets") test_that("External read functions without an import method", { extensions <- c("bib", "bmp", "gexf", "gnumeric", "jpeg", "jpg", "npy", "png", "sss", "sdmx", "tiff") for (ext in extensions) { file <- file.path(tempdir(), paste0("test.", ext)) file.create(file) on.exit(unlink(file)) expect_error(import(file)) } }) test_that("import method exported by an external package", { extensions <- c("bean", "beancount", "ledger", "hledger") for (ext in extensions) { file <- file.path(tempdir(), paste0("test.", ext)) file.create(file) on.exit(unlink(file)) expect_error(import(file)) } }) rio/tests/testthat/test_format_csv.R0000644000176200001440000000462113435251357017405 0ustar liggesuserscontext("CSV imports/exports") require("datasets") test_that("Export to CSV", { expect_true(export(iris, "iris.csv") %in% dir()) unlink("iris.csv") }) test_that("Export (Append) to CSV", { export(iris, "iris.csv") nlines <- length(readLines("iris.csv")) export(iris, "iris.csv", append = FALSE) expect_true(identical(length(readLines("iris.csv")), nlines)) export(iris, "iris.csv", append = TRUE) expect_true(identical(length(readLines("iris.csv")), (2L*nlines)-1L)) unlink("iris.csv") }) test_that("Import from CSV", { noheadercsv <- import(system.file("examples", "noheader.csv", package = "rio"), header = FALSE) expect_that(colnames(noheadercsv)[1], equals("V1"), label = "Header is correctly specified") }) test_that("Import from (European-style) CSV with semicolon separator", { write.table(iris, "iris2.csv", dec = ",", sep = ";", row.names = FALSE) expect_true("iris2.csv" %in% dir()) # import works (even if column classes are incorrect) expect_true(is.data.frame(import("iris2.csv", fread = TRUE, header = TRUE))) iris_imported <- import("iris2.csv", format = ";", fread = TRUE, header = TRUE) # import works with correct, numeric column classes expect_true(is.data.frame(iris_imported)) expect_true(is.numeric(iris_imported[["Sepal.Length"]])) }) context("CSV (.csv2) imports/exports") test_that("Export to CSV", { expect_true(export(iris, "iris.csv", format = "csv2") %in% dir()) }) test_that("Import from CSV (read.csv)", { expect_true(is.data.frame(import("iris.csv", format = "csv2"))) }) test_that("Import from CSV (fread)", { expect_true(is.data.frame(import("iris.csv", format = "csv2", fread = TRUE))) }) test_that("Export to TSV with CSV extension", { expect_true(export(iris, "iris.csv", format = "tsv") %in% dir()) }) test_that("Import from TSV with CSV extension", { expect_true(ncol(import("iris.csv")) == 5L) expect_true(ncol(import("iris.csv", format = "tsv")) == 5L) expect_true(ncol(import("iris.csv", format = "tsv", sep = "\t")) == 5L) expect_true(ncol(import("iris.csv", sep = ",")) == 5L) # use `data.table::fread(sep = "auto")` even if `sep` set explicitly to "," expect_true(ncol(import("iris.csv", format = "csv")) == 5L) expect_true(ncol(import("iris.csv", sep = "auto")) == 5L) }) unlink("iris.csv") unlink("iris2.csv") rio/tests/testthat/test_compress.R0000644000176200001440000000214313106310011017044 0ustar liggesuserscontext("Compressed files") test_that("Recognize compressed file types", { expect_true(rio:::find_compress("file.zip")$compress == "zip") expect_true(rio:::find_compress("file.tar")$compress == "tar") expect_true(rio:::find_compress("file.tar.gz")$compress == "tar") expect_true(is.na(rio:::find_compress("file.gz")$compress)) expect_true(is.na(rio:::find_compress("file.notcompressed")$compress)) }) test_that("Export to compressed (zip)", { e1 <- export(iris, "iris.csv.zip") expect_true(e1 %in% dir()) }) test_that("Export to compressed (tar)", { e2 <- export(iris, "iris.csv.tar") expect_true(e2 %in% dir()) }) test_that("Import from compressed", { expect_true(is.data.frame(import("iris.csv.zip"))) expect_true(is.data.frame(import("iris.csv.zip", which = 1))) expect_true(is.data.frame(import("iris.csv.zip", which = "iris.csv"))) # tar export does not work due to: https://bugs.r-project.org/bugzilla3/show_bug.cgi?id=16716 #expect_true(is.data.frame(import("iris.csv.tar"))) }) unlink("iris.csv.zip") unlink("iris.csv.tar") rio/tests/testthat/test_format_fst.R0000644000176200001440000000051113535446305017400 0ustar liggesuserscontext("fst imports/exports") require("datasets") test_that("Export to fst", { skip_if_not_installed(pkg="fst") expect_true(export(iris, "iris.fst") %in% dir()) }) test_that("Import from fst", { skip_if_not_installed(pkg="fst") expect_true(is.data.frame(import("iris.fst"))) }) unlink("iris.fst") rio/tests/testthat/test_remote.R0000644000176200001440000000376214015023005016520 0ustar liggesuserscontext("Remote Files") test_that("Import Remote Stata File", { f <- try(import("http://www.stata-press.com/data/r13/auto.dta")) if (!inherits(f, "try-error")) { expect_true(is.data.frame(f)) } }) test_that("Import Remote GitHub File", { rfile <- "https://raw.githubusercontent.com/leeper/rio/master/inst/examples/no_header.csv" rfile_imported1 <- try(import(rfile)) if (!inherits(rfile_imported1, "try-error")) { expect_true(inherits(rfile_imported1, "data.frame"), label = "Import remote file (implied format)") } rfile_imported2 <- try(import(rfile, format = "csv")) if (!inherits(rfile_imported2, "try-error")) { expect_true(inherits(rfile_imported2, "data.frame"), label = "Import remote file (explicit format)") } lfile <- remote_to_local(rfile) if (!inherits(lfile, "try-error")) { expect_true(file.exists(lfile), label = "Remote file copied successfully") expect_true(inherits(import(lfile), "data.frame"), label = "Import local copy successfully") } }) test_that("Import Remote File from Shortened URL", { skip_if_not_installed(pkg="data.table") shorturl <- try(import("https://goo.gl/KPFiaK")) if (!inherits(shorturl, "try-error")) { expect_true(inherits(shorturl, "data.frame"), label = "Import remote file") } }) test_that("Import from Google Sheets", { googleurl1 <- "https://docs.google.com/spreadsheets/d/1I9mJsS5QnXF2TNNntTy-HrcdHmIF9wJ8ONYvEJTXSNo/edit#gid=0" expect_true(inherits(import(googleurl1), "data.frame"), label = "Import google sheets (specified sheet)") googleurl2 <- "https://docs.google.com/spreadsheets/d/1I9mJsS5QnXF2TNNntTy-HrcdHmIF9wJ8ONYvEJTXSNo/edit" expect_true(inherits(import(googleurl2), "data.frame"), label = "Import google sheets (unspecified sheet)") expect_true(inherits(import(googleurl1, format = "tsv"), "data.frame"), label = "Import google sheets (specified sheet, specified format)") }) rio/tests/testthat/test_format_tsv.R0000644000176200001440000000037712660644311017425 0ustar liggesuserscontext("TSV imports/exports") require("datasets") test_that("Export to TSV", { expect_true(export(iris, "iris.tsv") %in% dir()) }) test_that("Import from TSV", { expect_true(is.data.frame(import("iris.tsv"))) }) unlink("iris.tsv") rio/tests/testthat/test_format_csv_gz.R0000644000176200001440000000043313070375302020072 0ustar liggesuserscontext(".csv.gz imports/exports") require("datasets") test_that("Export to csv.gz", { expect_true(export(iris, "iris.csv.gz") %in% dir()) }) test_that("Import from csv.gz", { expect_true(inherits(import("iris.csv.gz"), "data.frame")) }) unlink("iris.csv.gz") rio/tests/testthat/test_format_fortran.R0000644000176200001440000000042013106305747020254 0ustar liggesuserscontext("Fortran imports/exports") test_that("Import from Fortran", { ff <- tempfile() cat(file = ff, "123456", "987654", sep = "\n") expect_true(inherits(import(ff, format = "fortran", style = c("F2.1","F2.0","I2")), "data.frame")) unlink(ff) }) rio/tests/testthat/test_format_rdata.R0000644000176200001440000000310613552621765017706 0ustar liggesuserscontext("Rdata imports/exports") require("datasets") test_that("Export to Rdata", { # data.frame expect_true(export(iris, "iris.Rdata") %in% dir()) # environment e <- new.env() e$iris <- iris expect_true(export(e, "iris.Rdata") %in% dir()) # character expect_true(export("iris", "iris.Rdata") %in% dir()) # expect error otherwise expect_error(export(iris$Species, "iris.Rdata") %in% dir()) }) test_that("Import from Rdata", { expect_true(is.data.frame(import("iris.Rdata"))) expect_true(is.data.frame(import("iris.Rdata", which = 1))) expect_warning(is.data.frame(import("iris.Rdata",which=1, verbose='ignored value', invalid_argument=42)), "File imported using load. Arguments to '...' ignored.", label="RData imports and ignores unused arguments with a warning") }) test_that("Export to rda", { expect_true(export(iris, "iris.rda") %in% dir()) }) test_that("Import from rda", { expect_true(is.data.frame(import("iris.rda"))) expect_true(is.data.frame(import("iris.rda", which = 1))) expect_warning(is.data.frame(import("iris.rda", which=1, verbose="ignored value", invalid_argument=42)), "File imported using load. Arguments to '...' ignored.", label="rda imports and ignores unused arguments with a warning") }) unlink("iris.Rdata") unlink("iris.rda") rio/tests/testthat/test_format_R.R0000644000176200001440000000061713577132026017012 0ustar liggesuserscontext("R dump imports/exports") require("datasets") test_that("Export to .R dump file", { expect_true(export(iris, "iris.R") %in% dir()) expect_true(export(iris, "iris.dump") %in% dir()) }) test_that("Import from .R dump file", { expect_true(is.data.frame(import("iris.R"))) expect_true(is.data.frame(import("iris.dump"))) }) unlink("iris.R") unlink("iris.dump") rio/tests/testthat/test_convert.R0000644000176200001440000000167513106304650016716 0ustar liggesuserscontext("Convert") library("datasets") export(mtcars, "mtcars.dta") test_that("Basic file conversion", { convert("mtcars.dta", "mtcars.csv") convert("mtcars.csv", "mtcars.dta") x <- import("mtcars.dta") expect_true(identical(names(mtcars), names(x))) expect_true(identical(dim(mtcars), dim(x))) unlink("mtcars.csv") }) test_that("File conversion with arguments", { export(mtcars, "mtcars.csv", format = "tsv") convert("mtcars.csv", "mtcars.csv", in_opts = list(format = "tsv")) expect_true("mtcars.csv" %in% dir()) expect_true(!("mtcars.tsv" %in% dir())) convert("mtcars.csv", "mtcars.tsv", in_opts = list(format = "tsv"), out_opts = list(format = "csv")) expect_true("mtcars.tsv" %in% dir()) unlink("mtcars.csv") unlink("mtcars.tsv") }) test_that("File conversion w/o out_file errors", { expect_error(convert("mtcars.dta")) }) unlink("mtcars.dta") rio/tests/testthat/test_format_json.R0000644000176200001440000000116713577132026017563 0ustar liggesuserscontext("JSON imports/exports") require("datasets") test_that("Export to JSON", { skip_if_not_installed("jsonlite") expect_true(export(iris, "iris.json") %in% dir()) }) test_that("Import from JSON", { skip_if_not_installed("jsonlite") expect_true(is.data.frame(import("iris.json"))) }) test_that("Export to JSON (non-data frame)", { skip_if_not_installed("jsonlite") expect_true(export(list(1:10, letters), "list.json") %in% dir()) expect_true(inherits(import("list.json"), "list")) expect_true(length(import("list.json")) == 2L) }) unlink("iris.json") unlink("list.json") rio/tests/testthat/test_format_csvy.R0000644000176200001440000000051613600363336017570 0ustar liggesuserscontext("CSVY imports/exports") require("datasets") tmp <- tempfile(fileext = ".csvy") test_that("Export to CSVY", { suppressWarnings(expect_true(file.exists(export(iris, tmp)))) }) test_that("Import from CSVY", { suppressWarnings(d <- import(tmp)) expect_true(inherits(d, "data.frame")) }) unlink(tmp) rio/tests/testthat/test_format_mtp.R0000644000176200001440000000015112660645733017410 0ustar liggesuserscontext("Minitab (.mtp) imports/exports") require("datasets") #test_that("Import from Minitab", {}) rio/tests/testthat/test_format_rec.R0000644000176200001440000000015112660645755017365 0ustar liggesuserscontext("Epiinfo (.rec) imports/exports") require("datasets") #test_that("Import from Epiinfo", {}) rio/vignettes/0000755000176200001440000000000014142223745013057 5ustar liggesusersrio/vignettes/rio.Rmd0000644000176200001440000004375514017251410014321 0ustar liggesusers--- title: "Import, Export, and Convert Data Files" date: "`r Sys.Date()`" output: html_document: fig_caption: false toc: true toc_float: collapsed: false smooth_scroll: false toc_depth: 3 vignette: > %\VignetteIndexEntry{Introduction to 'rio'} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- # Import, Export, and Convert Data Files The idea behind **rio** is to simplify the process of importing data into R and exporting data from R. This process is, probably unnecessarily, extremely complex for beginning R users. Indeed, R supplies [an entire manual](https://cran.r-project.org/doc/manuals/r-release/R-data.html) describing the process of data import/export. And, despite all of that text, most of the packages described are (to varying degrees) out-of-date. Faster, simpler, packages with fewer dependencies have been created for many of the file types described in that document. **rio** aims to unify data I/O (importing and exporting) into two simple functions: `import()` and `export()` so that beginners (and experienced R users) never have to think twice (or even once) about the best way to read and write R data. The core advantage of **rio** is that it makes assumptions that the user is probably willing to make. Specifically, **rio** uses the file extension of a file name to determine what kind of file it is. This is the same logic used by Windows OS, for example, in determining what application is associated with a given file type. By taking away the need to manually match a file type (which a beginner may not recognize) to a particular import or export function, **rio** allows almost all common data formats to be read with the same function. By making import and export easy, it's an obvious next step to also use R as a simple data conversion utility. Transferring data files between various proprietary formats is always a pain and often expensive. The `convert` function therefore combines `import` and `export` to easily convert between file formats (thus providing a FOSS replacement for programs like [Stat/Transfer](https://stattransfer.com/) or [Sledgehammer](https://www.mtna.us/#/products/sledgehammer)). ## Supported file formats **rio** supports a variety of different file formats for import and export. To keep the package slim, all non-essential formats are supported via "Suggests" packages, which are not installed (or loaded) by default. To ensure rio is fully functional, install these packages the first time you use **rio** via: ```R install_formats() ``` The full list of supported formats is below: | Format | Typical Extension | Import Package | Export Package | Installed by Default | | ------ | --------- | -------------- | -------------- | -------------------- | | Comma-separated data | .csv | [**data.table**](https://cran.r-project.org/package=data.table) | [**data.table**](https://cran.r-project.org/package=data.table) | Yes | | Pipe-separated data | .psv | [**data.table**](https://cran.r-project.org/package=data.table) | [**data.table**](https://cran.r-project.org/package=data.table) | Yes | | Tab-separated data | .tsv | [**data.table**](https://cran.r-project.org/package=data.table) | [**data.table**](https://cran.r-project.org/package=data.table) | Yes | | CSVY (CSV + YAML metadata header) | .csvy | [**data.table**](https://cran.r-project.org/package=data.table) | [**data.table**](https://cran.r-project.org/package=data.table) | Yes | | SAS | .sas7bdat | [**haven**](https://cran.r-project.org/package=haven) | [**haven**](https://cran.r-project.org/package=haven) | Yes | | SPSS | .sav | [**haven**](https://cran.r-project.org/package=haven) | [**haven**](https://cran.r-project.org/package=haven) | Yes | | SPSS (compressed) | .zsav | [**haven**](https://cran.r-project.org/package=haven) | [**haven**](https://cran.r-project.org/package=haven) | Yes | | Stata | .dta | [**haven**](https://cran.r-project.org/package=haven) | [**haven**](https://cran.r-project.org/package=haven) | Yes | | SAS XPORT | .xpt | [**haven**](https://cran.r-project.org/package=haven) | [**haven**](https://cran.r-project.org/package=haven) | Yes | | SPSS Portable | .por | [**haven**](https://cran.r-project.org/package=haven) | | Yes | | Excel | .xls | [**readxl**](https://cran.r-project.org/package=readxl) | | Yes | | Excel | .xlsx | [**readxl**](https://cran.r-project.org/package=readxl) | [**openxlsx**](https://cran.r-project.org/package=openxlsx) | Yes | | R syntax | .R | **base** | **base** | Yes | | Saved R objects | .RData, .rda | **base** | **base** | Yes | | Serialized R objects | .rds | **base** | **base** | Yes | | Epiinfo | .rec | [**foreign**](https://cran.r-project.org/package=foreign) | | Yes | | Minitab | .mtp | [**foreign**](https://cran.r-project.org/package=foreign) | | Yes | | Systat | .syd | [**foreign**](https://cran.r-project.org/package=foreign) | | Yes | | "XBASE" database files | .dbf | [**foreign**](https://cran.r-project.org/package=foreign) | [**foreign**](https://cran.r-project.org/package=foreign) | Yes | | Weka Attribute-Relation File Format | .arff | [**foreign**](https://cran.r-project.org/package=foreign) | [**foreign**](https://cran.r-project.org/package=foreign) | Yes | | Data Interchange Format | .dif | **utils** | | Yes | | Fortran data | no recognized extension | **utils** | | Yes | | Fixed-width format data | .fwf | **utils** | **utils** | Yes | | gzip comma-separated data | .csv.gz | **utils** | **utils** | Yes | | Apache Arrow (Parquet) | .parquet | [**arrow**](https://cran.r-project.org/package=arrow) | [**arrow**](https://cran.r-project.org/package=arrow) | No | | EViews | .wf1 | [**hexView**](https://cran.r-project.org/package=hexView) | | No | | Feather R/Python interchange format | .feather | [**feather**](https://cran.r-project.org/package=feather) | [**feather**](https://cran.r-project.org/package=feather) | No | | Fast Storage | .fst | [**fst**](https://cran.r-project.org/package=fst) | [**fst**](https://cran.r-project.org/package=fst) | No | | JSON | .json | [**jsonlite**](https://cran.r-project.org/package=jsonlite) | [**jsonlite**](https://cran.r-project.org/package=jsonlite) | No | | Matlab | .mat | [**rmatio**](https://cran.r-project.org/package=rmatio) | [**rmatio**](https://cran.r-project.org/package=rmatio) | No | | OpenDocument Spreadsheet | .ods | [**readODS**](https://cran.r-project.org/package=readODS) | [**readODS**](https://cran.r-project.org/package=readODS) | No | | HTML Tables | .html | [**xml2**](https://cran.r-project.org/package=xml2) | [**xml2**](https://cran.r-project.org/package=xml2) | No | | Shallow XML documents | .xml | [**xml2**](https://cran.r-project.org/package=xml2) | [**xml2**](https://cran.r-project.org/package=xml2) | No | | YAML | .yml | [**yaml**](https://cran.r-project.org/package=yaml) | [**yaml**](https://cran.r-project.org/package=yaml) | No | | Clipboard | default is tsv | [**clipr**](https://cran.r-project.org/package=clipr) | [**clipr**](https://cran.r-project.org/package=clipr) | No | | [Google Sheets](https://www.google.com/sheets/about/) | as Comma-separated data | | | | Additionally, any format that is not supported by **rio** but that has a known R implementation will produce an informative error message pointing to a package and import or export function. Unrecognized formats will yield a simple "Unrecognized file format" error. ## Data Import **rio** allows you to import files in almost any format using one, typically single-argument, function. `import()` infers the file format from the file's extension and calls the appropriate data import function for you, returning a simple data.frame. This works for any for the formats listed above. ```{r, echo=FALSE, results='hide'} library("rio") export(mtcars, "mtcars.csv") export(mtcars, "mtcars.rds") export(mtcars, "mtcars.dta") export(mtcars, "mtcars_noext", format = "csv") ``` ```{r} library("rio") x <- import("mtcars.csv") y <- import("mtcars.rds") z <- import("mtcars.dta") # confirm identical all.equal(x, y, check.attributes = FALSE) all.equal(x, z, check.attributes = FALSE) ``` If for some reason a file does not have an extension, or has a file extension that does not match its actual type, you can manually specify a file format to override the format inference step. For example, we can read in a CSV file that does not have a file extension by specifying `csv`: ```{r} head(import("mtcars_noext", format = "csv")) ``` ```{r, echo=FALSE, results='hide'} unlink("mtcars.csv") unlink("mtcars.rds") unlink("mtcars.dta") unlink("mtcars_noext") ``` ### Importing Data Lists Sometimes you may have multiple data files that you want to import. `import()` only ever returns a single data frame, but `import_list()` can be used to import a vector of file names into R. This works even if the files are different formats: ```r str(import_list(dir()), 1) ``` Similarly, some single-file formats (e.g. Excel Workbooks, Zip directories, HTML files, etc.) can contain multiple data sets. Because `import()` is type safe, always returning a data frame, importing from these formats requires specifying a `which` argument to `import()` to dictate which data set (worksheet, file, table, etc.) to import (the default being `which = 1`). But `import_list()` can be used to import all (or only a specified subset, again via `which`) of data objects from these types of files. ## Data Export The export capabilities of **rio** are somewhat more limited than the import capabilities, given the availability of different functions in various R packages and because import functions are often written to make use of data from other applications and it never seems to be a development priority to have functions to export to the formats used by other applications. That said, **rio** currently supports the following formats: ```{r} library("rio") export(mtcars, "mtcars.csv") export(mtcars, "mtcars.rds") export(mtcars, "mtcars.dta") ``` It is also easy to use `export()` as part of an R pipeline (from magrittr or dplyr). For example, the following code uses `export()` to save the results of a simple data transformation: ```{r} library("magrittr") mtcars %>% subset(hp > 100) %>% aggregate(. ~ cyl + am, data = ., FUN = mean) %>% export(file = "mtcars2.dta") ``` Some file formats (e.g., Excel workbooks, Rdata files) can support multiple data objects in a single file. `export()` natively supports output of multiple objects to these types of files: ```{r} # export to sheets of an Excel workbook export(list(mtcars = mtcars, iris = iris), "multi.xlsx") ``` ```{r} # export to an .Rdata file ## as a named list export(list(mtcars = mtcars, iris = iris), "multi.rdata") ## as a character vector export(c("mtcars", "iris"), "multi.rdata") ``` It is also possible to use the new (as of v0.6.0) function `export_list()` to write a list of data frames to multiple files using either a vector of file names or a file pattern: ```{r} export_list(list(mtcars = mtcars, iris = iris), "%s.tsv") ``` ## File Conversion The `convert()` function links `import()` and `export()` by constructing a dataframe from the imported file and immediately writing it back to disk. `convert()` invisibly returns the file name of the exported file, so that it can be used to programmatically access the new file. Because `convert()` is just a thin wrapper for `import()` and `export()`, it is very easy to use. For example, we can convert ```{r} # create file to convert export(mtcars, "mtcars.dta") # convert Stata to SPSS convert("mtcars.dta", "mtcars.sav") ``` `convert()` also accepts lists of arguments for controlling import (`in_opts`) and export (`out_opts`). This can be useful for passing additional arguments to import or export methods. This could be useful, for example, for reading in a fixed-width format file and converting it to a comma-separated values file: ```{r} # create an ambiguous file fwf <- tempfile(fileext = ".fwf") cat(file = fwf, "123456", "987654", sep = "\n") # see two ways to read in the file identical(import(fwf, widths = c(1,2,3)), import(fwf, widths = c(1,-2,3))) # convert to CSV convert(fwf, "fwf.csv", in_opts = list(widths = c(1,2,3))) import("fwf.csv") # check conversion ``` ```{r, echo=FALSE, results='hide'} unlink("mtcars.dta") unlink("mtcars.sav") unlink("fwf.csv") unlink(fwf) ``` With metadata-rich file formats (e.g., Stata, SPSS, SAS), it can also be useful to pass imported data through `characterize()` or `factorize()` when converting to an open, text-delimited format: `characterize()` converts a single variable or all variables in a data frame that have "labels" attributes into character vectors based on the mapping of values to value labels (e.g., `export(characterize(import("file.dta")), "file.csv")`). An alternative approach is exporting to CSVY format, which records metadata in a YAML-formatted header at the beginning of a CSV file. It is also possible to use **rio** on the command-line by calling `Rscript` with the `-e` (expression) argument. For example, to convert a file from Stata (.dta) to comma-separated values (.csv), simply do the following: ``` Rscript -e "rio::convert('mtcars.dta', 'mtcars.csv')" ``` ## Package Philosophy The core advantage of **rio** is that it makes assumptions that the user is probably willing to make. Eight of these are important: 1. **rio** uses the file extension of a file name to determine what kind of file it is. This is the same logic used by Windows OS, for example, in determining what application is associated with a given file type. By removing the need to manually match a file type (which a beginner may not recognize) to a particular import or export function, **rio** allows almost all common data formats to be read with the same function. And if a file extension is incorrect, users can force a particular import method by specifying the `format` argument. Other packages do this as well, but **rio** aims to be more complete and more consistent than each: - [**reader**](https://cran.r-project.org/package=reader) handles certain text formats and R binary files - [**io**](https://cran.r-project.org/package=io) offers a set of custom formats - [**ImportExport**](https://cran.r-project.org/package=ImportExport) focuses on select binary formats (Excel, SPSS, and Access files) and provides a Shiny interface. - [**SchemaOnRead**](https://cran.r-project.org/package=SchemaOnRead) iterates through a large number of possible import methods until one works successfully 2. **rio** uses `data.table::fread()` for text-delimited files to automatically determine the file format regardless of the extension. So, a CSV that is actually tab-separated will still be correctly imported. It's also crazy fast. 3. **rio**, wherever possible, does not import character strings as factors. 4. **rio** supports web-based imports natively, including from SSL (HTTPS) URLs, from shortened URLs, from URLs that lack proper extensions, and from (public) Google Documents Spreadsheets. 5. **rio** imports from from single-file .zip and .tar archives automatically, without the need to explicitly decompress them. Export to compressed directories is also supported. 6. **rio** wraps a variety of faster, more stream-lined I/O packages than those provided by base R or the **foreign** package. It uses [**data.table**](https://cran.r-project.org/package=data.table) for delimited formats, [**haven**](https://cran.r-project.org/package=haven) for SAS, Stata, and SPSS files, smarter and faster fixed-width file import and export routines, and [**readxl**](https://cran.r-project.org/package=readxl) and [**openxlsx**](https://cran.r-project.org/package=openxlsx) for reading and writing Excel workbooks. 7. **rio** stores metadata from rich file formats (SPSS, Stata, etc.) in variable-level attributes in a consistent form regardless of file type or underlying import function. These attributes are identified as: - `label`: a description of variable - `labels`: a vector mapping numeric values to character strings those values represent - `format`: a character string describing the variable storage type in the original file The `gather_attrs()` function makes it easy to move variable-level attributes to the data frame level (and `spread_attrs()` reverses that gathering process). These can be useful, especially, during file conversion to more easily modify attributes that are handled differently across file formats. As an example, the following idiom can be used to trim SPSS value labels to the 32-character maximum allowed by Stata: ```R dat <- gather_attrs(rio::import("data.sav")) attr(dat, "labels") <- lapply(attributes(dat)$labels, function(x) { if (!is.null(x)) { names(x) <- substring(names(x), 1, 32) } x }) export(spread_attrs(dat), "data.dta") ``` In addition, two functions (added in v0.5.5) provide easy ways to create character and factor variables from these "labels" attributes. `characterize()` converts a single variable or all variables in a data frame that have "labels" attributes into character vectors based on the mapping of values to value labels. `factorize()` does the same but returns factor variables. This can be especially helpful for converting these rich file formats into open formats (e.g., `export(characterize(import("file.dta")), "file.csv")`. 8. **rio** imports and exports files based on an internal S3 class infrastructure. This means that other packages can contain extensions to **rio** by registering S3 methods. These methods should take the form `.import.rio_X()` and `.export.rio_X()`, where `X` is the file extension of a file type. An example is provided in the [rio.db package](https://github.com/leeper/rio.db). ```{r, echo=FALSE, results='hide'} unlink("mtcars.csv") unlink("mtcars.rds") unlink("mtcars.rdata") unlink("mtcars.dta") unlink("multi.xlsx") unlink("multi.rdata") unlink("mtcars2.dta") unlink("mtcars.tsv") unlink("iris.tsv") ``` rio/R/0000755000176200001440000000000014141525702011245 5ustar liggesusersrio/R/import_list.R0000644000176200001440000001575714135741516013762 0ustar liggesusers#' @title Import list of data frames #' @description Use \code{\link{import}} to import a list of data frames from a vector of file names or from a multi-object file (Excel workbook, .Rdata file, zip directory, or HTML file) #' @param file A character string containing a single file name for a multi-object file (e.g., Excel workbook, zip directory, or HTML file), or a vector of file paths for multiple files to be imported. #' @template setclass #' @param which If \code{file} is a single file path, this specifies which objects should be extracted (passed to \code{\link{import}}'s \code{which} argument). Ignored otherwise. #' @param rbind A logical indicating whether to pass the import list of data frames through \code{\link[data.table]{rbindlist}}. #' @param rbind_label If \code{rbind = TRUE}, a character string specifying the name of a column to add to the data frame indicating its source file. #' @param rbind_fill If \code{rbind = TRUE}, a logical indicating whether to set the \code{fill = TRUE} (and fill missing columns with \code{NA}). #' @param \dots Additional arguments passed to \code{\link{import}}. Behavior may be unexpected if files are of different formats. #' @return If \code{rbind=FALSE} (the default), a list of a data frames. Otherwise, that list is passed to \code{\link[data.table]{rbindlist}} with \code{fill = TRUE} and returns a data frame object of class set by the \code{setclass} argument; if this operation fails, the list is returned. #' @examples #' library('datasets') #' export(list(mtcars1 = mtcars[1:10,], #' mtcars2 = mtcars[11:20,], #' mtcars3 = mtcars[21:32,]), #' xlsx_file <- tempfile(fileext = ".xlsx") #' ) #' #' # import a single file from multi-object workbook #' str(import(xlsx_file, which = "mtcars1")) #' #' # import all worksheets #' str(import_list(xlsx_file), 1) #' #' # import and rbind all worksheets #' mtcars2 <- import_list(xlsx_file, rbind = TRUE) #' all.equal(mtcars2[,-12], mtcars, check.attributes = FALSE) #' #' # import multiple files #' wd <- getwd() #' setwd(tempdir()) #' export(mtcars, "mtcars1.csv") #' export(mtcars, "mtcars2.csv") #' str(import_list(dir(pattern = "csv$")), 1) #' unlink(c("mtcars1.csv", "mtcars2.csv")) #' setwd(wd) #' #' # cleanup #' unlink(xlsx_file) #' #' @seealso \code{\link{import}}, \code{\link{export_list}}, \code{\link{export}} #' @export import_list <- function(file, setclass, which, rbind = FALSE, rbind_label = "_file", rbind_fill = TRUE, ...) { if (missing(setclass)) { setclass <- NULL } strip_exts <- function(file) { sapply(file, function(x) tools::file_path_sans_ext(basename(x))) } if (length(file) > 1) { names(file) <- strip_exts(file) x <- lapply(file, function(thisfile) { out <- try(import(thisfile, setclass = setclass, ...), silent = TRUE) if (inherits(out, "try-error")) { warning(sprintf("Import failed for %s", thisfile)) out <- NULL } else if (isTRUE(rbind)) { out[[rbind_label]] <- thisfile } structure(out, filename = thisfile) }) names(x) <- names(file) } else { if (get_ext(file) == "rdata") { e <- new.env() load(file, envir = e) x <- as.list(e) } else { if (get_ext(file) == "html") { requireNamespace("xml2", quietly = TRUE) tables <- xml2::xml_find_all(xml2::read_html(unclass(file)), ".//table") if (missing(which)) { which <- seq_along(tables) } whichnames <- sapply(xml2::xml_attrs(tables[which]), function(x) if ("class" %in% names(x)) x["class"] else "" ) names(which) <- whichnames } else if (get_ext(file) %in% c("xls","xlsx")) { requireNamespace("readxl", quietly = TRUE) whichnames <- readxl::excel_sheets(path = file) if (missing(which)) { which <- seq_along(whichnames) names(which) <- whichnames } else if (is.character(which)) { whichnames <- which } else { whichnames <- whichnames[which] } } else if (get_ext(file) %in% c("zip")) { if (missing(which)) { whichnames <- utils::unzip(file, list = TRUE)[, "Name"] which <- seq_along(whichnames) names(which) <- strip_exts(whichnames) } else if (is.character(which)) { whichnames <- utils::unzip(file, list = TRUE)[, "Name"] whichnames <- whichnames[whichnames %in% which] } else { whichnames <- utils::unzip(file, list = TRUE)[, "Name"] names(which) <- strip_exts(whichnames) } } else { which <- 1 whichnames <- NULL } x <- lapply(which, function(thiswhich) { out <- try(import(file, setclass = setclass, which = thiswhich, ...), silent = TRUE) if (inherits(out, "try-error")) { warning(sprintf("Import failed for %s from %s", thiswhich, file)) out <- NULL } else if (isTRUE(rbind) && length(which) > 1) { out[[rbind_label]] <- thiswhich } out }) names(x) <- whichnames } } # optionally rbind if (isTRUE(rbind)) { if (length(x) == 1) { x <- x[[1L]] } else { x2 <- try(data.table::rbindlist(x, fill = rbind_fill), silent = TRUE) if (inherits(x2, "try-error")) { warning("Attempt to rbindlist() the data did not succeed. List returned instead.") return(x) } else { x <- x2 } } # set class a <- list(...) if (is.null(setclass)) { if ("data.table" %in% names(a) && isTRUE(a[["data.table"]])) { x <- set_class(x, class = "data.table") } else { x <- set_class(x, class = "data.frame") } } else { if ("data.table" %in% names(a) && isTRUE(a[["data.table"]])) { if (setclass != "data.table") { warning(sprintf("'data.table = TRUE' argument overruled. Using setclass = '%s'", setclass)) x <- set_class(x, class = setclass) } else { x <- set_class(x, class = "data.table") } } else { x <- set_class(x, class = setclass) } } } return(x) } rio/R/export_methods.R0000644000176200001440000002603614141720703014441 0ustar liggesusers#' @importFrom data.table fwrite #' @importFrom utils write.table export_delim <- function(file, x, fwrite = TRUE, sep = "\t", row.names = FALSE, col.names = TRUE, append = FALSE, ...) { if (isTRUE(fwrite) & !inherits(file, "connection")) { if (isTRUE(append)) { data.table::fwrite(x, file = file, sep = sep, row.names = row.names, col.names = FALSE, append = TRUE, ...) } else { data.table::fwrite(x, file = file, sep = sep, row.names = row.names, col.names = col.names, append = FALSE, ...) } } else { if (isTRUE(fwrite) & inherits(file, "connection")) { message("data.table::fwrite() does not support writing to connections. Using utils::write.table() instead.") } if (isTRUE(append)) { write.table(x, file = file, sep = sep, row.names = row.names, col.names = FALSE, append = TRUE, ...) } else { write.table(x, file = file, sep = sep, row.names = row.names, col.names = col.names, append = FALSE, ...) } } } #' @export .export.rio_txt <- function(file, x, ...) { export_delim(x = x, file = file, ...) } #' @export .export.rio_tsv <- function(file, x, ...) { export_delim(x = x, file = file, ...) } #' @export .export.rio_csv <- function(file, x, sep = ",", dec = ".", ...) { export_delim(x = x, file = file, sep = sep, dec = dec, ...) } #' @export .export.rio_csv2 <- function(file, x, sep = ";", dec = ",", ...) { export_delim(x = x, file = file, sep = sep, dec = dec, ...) } #' @export .export.rio_csvy <- function(file, x, sep = ",", dec = ".", yaml = TRUE, ...) { export_delim(x = x, file = file, sep = sep, dec = dec, yaml = TRUE, ...) } #' @export .export.rio_psv <- function(file, x, ...) { export_delim(x = x, file = file, sep = "|", ...) } #' @importFrom utils capture.output write.csv #' @export .export.rio_fwf <- function(file, x, verbose = getOption("verbose", FALSE), sep = "", row.names = FALSE, quote = FALSE, col.names = FALSE, digits = getOption("digits", 7), ...) { dat <- lapply(x, function(col) { if (is.character(col)) { col <- as.numeric(as.factor(col)) } else if(is.factor(col)) { col <- as.integer(col) } if (is.integer(col)) { return(sprintf("%i",col)) } if (is.numeric(col)) { decimals <- strsplit(as.character(col), ".", fixed = TRUE) m1 <- max(nchar(unlist(lapply(decimals, `[`, 1))), na.rm = TRUE) decimals_2 <- unlist(lapply(decimals, `[`, 2)) decimals_2_nchar <- nchar(decimals_2[!is.na(decimals_2)]) if (length(decimals_2_nchar)) { m2 <- max(decimals_2_nchar, na.rm = TRUE) } else { m2 <- 0 } if (!is.finite(m2)) { m2 <- digits } return(formatC(sprintf(fmt = paste0("%0.",m2,"f"), col), width = (m1+m2+1))) } else if(is.logical(col)) { return(sprintf("%i",col)) } }) dat <- do.call(cbind, dat) n <- nchar(dat[1,]) + c(rep(nchar(sep), ncol(dat)-1), 0) col_classes <- sapply(x, class) col_classes[col_classes == "factor"] <- "integer" dict <- cbind.data.frame(variable = names(n), class = col_classes, width = unname(n), columns = paste0(c(1, cumsum(n)+1)[-length(n)], "-", cumsum(n)), stringsAsFactors = FALSE) if (isTRUE(verbose)) { message("Columns:") message(paste0(capture.output(dict), collapse = "\n")) if (sep == "") { message(paste0('\nRead in with:\n', 'import("', file, '",\n', ' widths = c(', paste0(n, collapse = ","), '),\n', ' col.names = c("', paste0(names(n), collapse = '","'), '"),\n', ' colClasses = c("', paste0(col_classes, collapse = '","') ,'"))\n'), domain = NA) } } cat(paste0("#", capture.output(write.csv(dict, row.names = FALSE, quote = FALSE))), file = file, sep = "\n") utils::write.table(dat, file = file, append = TRUE, row.names = row.names, sep = sep, quote = quote, col.names = col.names, ...) } #' @export .export.rio_r <- function(file, x, ...) { dput(x, file = file, ...) } #' @export .export.rio_dump <- function(file, x, ...) { dump(as.character(substitute(x)), file = file, ...) } #' @export .export.rio_rds <- function(file, x, ...) { saveRDS(object = x, file = file, ...) } #' @export .export.rio_rdata <- function(file, x, ...) { if (is.data.frame(x)) { return(save(x, file = file, ...)) } else if (is.list(x)) { e <- as.environment(x) save(list = names(x), file = file, envir = e, ...) } else if (is.environment(x)) { save(list = ls(x), file = file, envir = x, ...) } else if (is.character(x)) { save(list = x, file = file, ...) } else { stop("'x' must be a data.frame, list, or environment") } } #' @export .export.rio_rda <- .export.rio_rdata #' @export .export.rio_feather <- function(file, x, ...) { requireNamespace("feather") feather::write_feather(x = x, path = file) } #' @export .export.rio_fst <- function(file, x, ...) { requireNamespace("fst") fst::write.fst(x = x, path = file, ...) } #' @export .export.rio_matlab <- function(file, x, ...) { requireNamespace("rmatio") rmatio::write.mat(object = x, filename = file, ...) } #' @importFrom haven write_sav #' @export .export.rio_sav <- function(file, x, ...) { x <- restore_labelled(x) haven::write_sav(data = x, path = file, ...) } #' @importFrom haven write_sav #' @export .export.rio_zsav <- function(file, x, compress = TRUE, ...) { x <- restore_labelled(x) haven::write_sav(data = x, path = file, compress = compress, ...) } #' @importFrom haven write_dta #' @export .export.rio_dta <- function(file, x, ...) { x <- restore_labelled(x) haven::write_dta(data = x, path = file, ...) } #' @importFrom haven write_sas #' @export .export.rio_sas7bdat <- function(file, x, ...) { x <- restore_labelled(x) haven::write_sas(data = x, path = file, ...) } #' @importFrom haven write_xpt #' @export .export.rio_xpt <- function(file, x, ...) { x <- restore_labelled(x) haven::write_xpt(data = x, path = file, ...) } #' @importFrom foreign write.dbf #' @export .export.rio_dbf <- function(file, x, ...) { foreign::write.dbf(dataframe = x, file = file, ...) } #' @export .export.rio_json <- function(file, x, ...) { requireNamespace("jsonlite") cat(jsonlite::toJSON(x, ...), file = file) } #' @importFrom foreign write.arff #' @export .export.rio_arff <- function(file, x, ...) { foreign::write.arff(x = x, file = file, ...) } #' @importFrom openxlsx write.xlsx #' @export .export.rio_xlsx <- function(file, x, which, ...) { dots <- list(...) if (!missing(which)) { if (file.exists(file)) { wb <- openxlsx::loadWorkbook(file = file) sheets <- openxlsx::getSheetNames(file = file) if (is.numeric(which)) { if (which <= length(sheets)) { which <- sheets[which] } else { which <- paste("Sheet", length(sheets) + 1L) } } if (!which %in% sheets) { openxlsx::addWorksheet(wb, sheet = which) } else { openxlsx::removeWorksheet(wb, sheet = which) openxlsx::addWorksheet(wb, sheet = which) openxlsx::worksheetOrder(wb) <- sheets } openxlsx::writeData(wb, sheet = which, x = x) openxlsx::saveWorkbook(wb, file = file, overwrite = TRUE) } else { openxlsx::write.xlsx(x = x, file = file, sheetName = which, ...) } } else { openxlsx::write.xlsx(x = x, file = file, ...) } } #' @export .export.rio_ods <- function(file, x, ...) { requireNamespace("readODS") readODS::write_ods(x = x, path = file) } #' @export .export.rio_html <- function(file, x, ...) { requireNamespace("xml2") html <- xml2::read_html("\nR Exported Data\n\n\n") bod <- xml2::xml_children(html)[[2]] if (is.data.frame(x)) { x <- list(x) } for (i in seq_along(x)) { x[[i]][] <- lapply(x[[i]], as.character) x[[i]][] <- lapply(x[[i]], function(v) gsub('&','&',v)) names(x[[i]]) <- gsub('&','&',names(x[[i]])) tab <- xml2::xml_add_child(bod, "table") # add header row invisible(xml2::xml_add_child(tab, xml2::read_xml(paste0(twrap(paste0(twrap(names(x[[i]]), "th"), collapse = ""), "tr"), "\n")))) # add data for (j in seq_len(nrow(x[[i]]))) { xml2::xml_add_child(tab, xml2::read_xml(paste0(twrap(paste0(twrap(unlist(x[[i]][j, , drop = TRUE]), "td"), collapse = ""), "tr"), "\n"))) } } xml2::write_xml(html, file = file, ...) } #' @export .export.rio_xml <- function(file, x, ...) { requireNamespace("xml2") root <- "" xml <- xml2::read_xml(paste0("<",as.character(substitute(x)),">\n\n")) att <- attributes(x)[!names(attributes(x)) %in% c("names", "row.names", "class")] for (a in seq_along(att)) { xml2::xml_attr(xml, names(att)[a]) <- att[[a]] } # remove illegal characters row.names(x) <- gsub('&', '&', row.names(x)) colnames(x) <- gsub('[ &]', '.', colnames(x)) x[] <- lapply(x, function(v) gsub('&', '&', v)) # add data for (i in seq_len(nrow(x))) { thisrow <- xml2::xml_add_child(xml, "Observation") xml2::xml_attr(thisrow, "row.name") <- row.names(x)[i] for (j in seq_along(x)) { xml2::xml_add_child(thisrow, xml2::read_xml(paste0(twrap(x[i, j, drop = TRUE], names(x)[j]), "\n"))) } } xml2::write_xml(xml, file = file, ...) } #' @export .export.rio_yml <- function(file, x, ...) { requireNamespace("yaml") cat(yaml::as.yaml(x, ...), file = file) } #' @export .export.rio_clipboard <- function(file, x, row.names = FALSE, col.names = TRUE, sep = "\t", ...) { requireNamespace("clipr") clipr::write_clip(content = x, row.names = row.names, col.names = col.names, sep = sep, ...) } #' @export .export.rio_pzfx <- function(file, x, ..., row_names=FALSE) { requireNamespace("pzfx") pzfx::write_pzfx(x=x, path=file, ..., row_names=row_names) } #' @export .export.rio_parquet <- function(file, x, ...) { requireNamespace("arrow") arrow::write_parquet(x=x, sink=file, ...) } rio/R/compression.R0000644000176200001440000000604113341543466013742 0ustar liggesusersfind_compress <- function(f) { if (grepl("zip$", f)) { file <- sub("\\.zip$", "", f) compress <- "zip" } else if (grepl("tar\\.gz$", f)) { file <- sub("\\.tar\\.gz$", "", f) compress <- "tar" } else if (grepl("tar$", f)) { file <- sub("\\.tar$", "", f) compress <- "tar" } else { file <- f compress <- NA_character_ } return(list(file = file, compress = compress)) } compress_out <- function(cfile, filename, type = c("zip", "tar", "gzip", "bzip2", "xz")) { type <- ext <- match.arg(type) if (ext %in% c("gzip", "bzip2", "xz")) { ext <- paste0("tar") } if (missing(cfile)) { cfile <- paste0(filename, ".", ext) cfile2 <- paste0(basename(filename), ".", ext) } else { cfile2 <- basename(cfile) } filename <- normalizePath(filename) tmp <- tempfile() dir.create(tmp) on.exit(unlink(tmp, recursive = TRUE), add = TRUE) file.copy(from = filename, to = file.path(tmp, basename(filename)), overwrite = TRUE) wd <- getwd() on.exit(setwd(wd), add = TRUE) setwd(tmp) if (type == "zip") { o <- zip(cfile2, files = basename(filename)) } else { if (type == "tar") { type <- "none" } o <- tar(cfile2, files = basename(filename), compression = type) } setwd(wd) if (o != 0) { stop(sprintf("File compression failed for %s!", cfile)) } file.copy(from = file.path(tmp, cfile2), to = cfile, overwrite = TRUE) unlink(file.path(tmp, cfile2)) return(cfile) } parse_zip <- function(file, which, ...) { d <- tempfile() dir.create(d) file_list <- utils::unzip(file, list = TRUE) if (missing(which)) { which <- 1 if (nrow(file_list) > 1) { warning(sprintf("Zip archive contains multiple files. Attempting first file.")) } } if (is.numeric(which)) { utils::unzip(file, files = file_list$Name[which], exdir = d) file.path(d, file_list$Name[which]) } else { if (substring(which, 1,1) != "^") { which2 <- paste0("^", which) } utils::unzip(file, files = file_list$Name[grep(which2, file_list$Name)[1]], exdir = d) file.path(d, which) } } parse_tar <- function(file, which, ...) { d <- tempfile() dir.create(d) on.exit(unlink(d)) file_list <- utils::untar(file, list = TRUE) if (missing(which)) { which <- 1 if (length(file_list) > 1) { warning(sprintf("Tar archive contains multiple files. Attempting first file.")) } } if (is.numeric(which)) { utils::untar(file, files = file_list[which], exdir = d) file.path(d, file_list[which]) } else { if (substring(which, 1,1) != "^") { which2 <- paste0("^", which) } utils::untar(file, files = file_list[grep(which2, file_list)[1]], exdir = d) file.path(d, which) } } rio/R/remote_to_local.R0000644000176200001440000000511413341543555014547 0ustar liggesusersremote_to_local <- function(file, format) { if (missing(format)) { # handle google sheets urls if (grepl("docs\\.google\\.com/spreadsheets", file)) { file <- convert_google_url(file, export_as = "csv") fmt <- "csv" } else { # try to extract format from URL fmt <- try(get_ext(file), silent = TRUE) if (inherits(fmt, "try-error")) { fmt <- "TMP" } } } else { # handle google sheets urls if (grepl("docs\\.google\\.com/spreadsheets", file)) { fmt <- get_type(format) if (fmt %in% c("csv", "tsv", "xlsx", "ods")) { file <- convert_google_url(file, export_as = fmt) fmt <- fmt } else { file <- convert_google_url(file, export_as = "csv") fmt <- "csv" } } else { fmt <- get_type(format) } } # save file locally temp_file <- tempfile(fileext = paste0(".", fmt)) u <- curl::curl_fetch_memory(file) writeBin(object = u$content, con = temp_file) if (fmt == "TMP") { # try to extract format from curl's final URL fmt <- try(get_ext(u$url), silent = TRUE) if (inherits(fmt, "try-error")) { # try to extract format from headers h1 <- parse_headers(u$headers) # check `Content-Disposition` header if (any(grepl("^Content-Disposition", h1))) { h <- h1[grep("filename", h1)] if (length(h)) { f <- regmatches(h, regexpr("(?<=\")(.*)(?", value, "") } rio/R/suggestions.R0000644000176200001440000000422513341543331013744 0ustar liggesusers#' @title Install rio's \sQuote{Suggests} Dependencies #' @description This function installs various \sQuote{Suggests} dependencies for rio that expand its support to the full range of support import and export formats. These packages are not installed or loaded by default in order to create a slimmer and faster package build, install, and load. #' @param \dots Additional arguments passed to \code{\link[utils]{install.packages}}. #' @return \code{NULL} #' @importFrom utils install.packages #' @export install_formats <- function(...) { to_install <- uninstalled_formats() if (length(to_install)) { utils::install.packages(to_install, ...) } return(TRUE) } #' @importFrom utils packageName uninstalled_formats <- function() { # Suggested packages (robust to changes in DESCRIPTION file) # Instead of flagging *new* suggestions by hand, this method only requires # flagging *non-import* suggestions (such as `devtools`, `knitr`, etc.). # This could be even more robust if the call to `install_formats()` instead # wrapped a call to `::install_deps(dependencies = # "Suggests")`, since this retains the package versioning (e.g. `xml2 (>= # 1.2.0)`) suggested in the `DESCRIPTION` file. However, this seems a bit # recursive, as `devtools` or `remotes` are often also in the `Suggests` # field. suggestions <- read.dcf(system.file("DESCRIPTION", package = utils::packageName(), mustWork = TRUE), fields = "Suggests") suggestions <- parse_suggestions(suggestions) common_suggestions <- c("bit64", "datasets", "devtools", "knitr", "magrittr", "testthat") suggestions <- setdiff(suggestions, common_suggestions) # which are not installed unlist(lapply(suggestions, function(x) { if (length(find.package(x, quiet = TRUE))) { NULL } else { x } })) } parse_suggestions <- function(suggestions) { suggestions <- unlist(strsplit(suggestions, split = ",|, |\n")) suggestions <- gsub("\\s*\\(.*\\)", "", suggestions) suggestions <- sort(suggestions[suggestions != ""]) suggestions } rio/R/is_file_text.R0000644000176200001440000000210014135741375014050 0ustar liggesusers#' @title Determine whether a file is \dQuote{plain-text} or some sort of binary format #' #' @param file Path to the file #' @param maxsize Maximum number of bytes to read #' @param text_bytes Which characters are used by normal text (though not #' necessarily just ASCII). To detect just ASCII, the #' following value can be used: #' \code{as.raw(c(7:16, 18, 19, 32:127))} #' #' @return A logical #' @export #' @examples #' library(datasets) #' export(iris, yml_file <- tempfile(fileext = ".yml")) #' is_file_text(yml_file) # TRUE #' #' export(iris, sav_file <- tempfile(fileext = ".sav")) #' is_file_text(sav_file) # FALSE #' #' # cleanup #' unlink(yml_file) #' unlink(sav_file) #' is_file_text <- function( file, maxsize = Inf, text_bytes = as.raw(c(0x7:0x10, 0x12, 0x13, 0x20:0xFF)) ) { ff <- file(file, "rb") bytes <- readBin( ff, raw(), n = min(file.info(file)$size, maxsize) ) close(ff) return(length(setdiff(bytes, text_bytes)) == 0) } rio/R/export.R0000644000176200001440000002350014135740253012714 0ustar liggesusers#' @rdname export #' @title Export #' @description Write data.frame to a file #' @param x A data frame or matrix to be written into a file. Exceptions to this rule are that \code{x} can be a list of data frames if the output file format is an Excel .xlsx workbook, .Rdata file, or HTML file, or a variety of R objects if the output file format is RDS or JSON. See examples.) To export a list of data frames to multiple files, use \code{\link{export_list}} instead. #' @param file A character string naming a file. Must specify \code{file} and/or \code{format}. #' @param format An optional character string containing the file format, which can be used to override the format inferred from \code{file} or, in lieu of specifying \code{file}, a file with the symbol name of \code{x} and the specified file extension will be created. Must specify \code{file} and/or \code{format}. Shortcuts include: \dQuote{,} (for comma-separated values), \dQuote{;} (for semicolon-separated values), \dQuote{|} (for pipe-separated values), and \dQuote{dump} for \code{\link[base]{dump}}. #' @param \dots Additional arguments for the underlying export functions. This can be used to specify non-standard arguments. See examples. #' @return The name of the output file as a character string (invisibly). #' @details This function exports a data frame or matrix into a file with file format based on the file extension (or the manually specified format, if \code{format} is specified). #' #' The output file can be to a compressed directory, simply by adding an appropriate additional extensiont to the \code{file} argument, such as: \dQuote{mtcars.csv.tar}, \dQuote{mtcars.csv.zip}, or \dQuote{mtcars.csv.gz}. #' #' \code{export} supports many file formats. See the documentation for the underlying export functions for optional arguments that can be passed via \code{...} #' #' \itemize{ #' \item Comma-separated data (.csv), using \code{\link[data.table]{fwrite}} or, if \code{fwrite = TRUE}, \code{\link[utils]{write.table}} with \code{row.names = FALSE}. #' \item Pipe-separated data (.psv), using \code{\link[data.table]{fwrite}} or, if \code{fwrite = TRUE}, \code{\link[utils]{write.table}} with \code{sep = '|'} and \code{row.names = FALSE}. #' \item Tab-separated data (.tsv), using \code{\link[data.table]{fwrite}} or, if \code{fwrite = TRUE}, \code{\link[utils]{write.table}} with \code{row.names = FALSE}. #' \item SAS (.sas7bdat), using \code{\link[haven]{write_sas}}. #' \item SAS XPORT (.xpt), using \code{\link[haven]{write_xpt}}. #' \item SPSS (.sav), using \code{\link[haven]{write_sav}} #' \item SPSS compressed (.zsav), using \code{\link[haven]{write_sav}} #' \item Stata (.dta), using \code{\link[haven]{write_dta}}. Note that variable/column names containing dots (.) are not allowed and will produce an error. #' \item Excel (.xlsx), using \code{\link[openxlsx]{write.xlsx}}. Existing workbooks are overwritten unless \code{which} is specified, in which case only the specified sheet (if it exists) is overwritten. If the file exists but the \code{which} sheet does not, data are added as a new sheet to the existing workbook. \code{x} can also be a list of data frames; the list entry names are used as sheet names. #' \item R syntax object (.R), using \code{\link[base]{dput}} (by default) or \code{\link[base]{dump}} (if \code{format = 'dump'}) #' \item Saved R objects (.RData,.rda), using \code{\link[base]{save}}. In this case, \code{x} can be a data frame, a named list of objects, an R environment, or a character vector containing the names of objects if a corresponding \code{envir} argument is specified. #' \item Serialized R objects (.rds), using \code{\link[base]{saveRDS}}. In this case, \code{x} can be any serializable R object. #' \item "XBASE" database files (.dbf), using \code{\link[foreign]{write.dbf}} #' \item Weka Attribute-Relation File Format (.arff), using \code{\link[foreign]{write.arff}} #' \item Fixed-width format data (.fwf), using \code{\link[utils]{write.table}} with \code{row.names = FALSE}, \code{quote = FALSE}, and \code{col.names = FALSE} #' \item gzip comma-separated data (.csv.gz), using \code{\link[utils]{write.table}} with \code{row.names = FALSE} #' \item \href{https://github.com/csvy}{CSVY} (CSV with a YAML metadata header) using \code{\link[data.table]{fwrite}}. #' \item Apache Arrow Parquet (.parquet), using \code{\link[arrow]{write_parquet}} #' \item Feather R/Python interchange format (.feather), using \code{\link[feather]{write_feather}} #' \item Fast storage (.fst), using \code{\link[fst]{write.fst}} #' \item JSON (.json), using \code{\link[jsonlite]{toJSON}}. In this case, \code{x} can be a variety of R objects, based on class mapping conventions in this paper: \href{https://arxiv.org/abs/1403.2805}{https://arxiv.org/abs/1403.2805}. #' \item Matlab (.mat), using \code{\link[rmatio]{write.mat}} #' \item OpenDocument Spreadsheet (.ods), using \code{\link[readODS]{write_ods}}. (Currently only single-sheet exports are supported.) #' \item HTML (.html), using a custom method based on \code{\link[xml2]{xml_add_child}} to create a simple HTML table and \code{\link[xml2]{write_xml}} to write to disk. #' \item XML (.xml), using a custom method based on \code{\link[xml2]{xml_add_child}} to create a simple XML tree and \code{\link[xml2]{write_xml}} to write to disk. #' \item YAML (.yml), using \code{\link[yaml]{as.yaml}} #' \item Clipboard export (on Windows and Mac OS), using \code{\link[utils]{write.table}} with \code{row.names = FALSE} #' } #' #' When exporting a data set that contains label attributes (e.g., if imported from an SPSS or Stata file) to a plain text file, \code{\link{characterize}} can be a useful pre-processing step that records value labels into the resulting file (e.g., \code{export(characterize(x), "file.csv")}) rather than the numeric values. #' #' Use \code{\link{export_list}} to export a list of dataframes to separate files. #' #' @examples #' library("datasets") #' # specify only `file` argument #' export(mtcars, f1 <- tempfile(fileext = ".csv")) #' #' \dontrun{ #' wd <- getwd() #' setwd(tempdir()) #' # Stata does not recognize variables names with '.' #' export(mtcars, f2 <- tempfile(fileext = ".dta")) #' #' # specify only `format` argument #' f2 %in% tempdir() #' export(mtcars, format = "stata") #' "mtcars.dta" %in% dir() #' #' setwd(wd) #' } #' # specify `file` and `format` to override default format #' export(mtcars, file = f3 <- tempfile(fileext = ".txt"), format = "csv") #' #' # export multiple objects to Rdata #' export(list(mtcars = mtcars, iris = iris), f4 <- tempfile(fileext = ".rdata")) #' export(c("mtcars", "iris"), f4) #' #' # export to non-data frame R object to RDS or JSON #' export(mtcars$cyl, f5 <- tempfile(fileext = ".rds")) #' export(list(iris, mtcars), f6 <- tempfile(fileext = ".json")) #' #' # pass arguments to underlying export function #' export(mtcars, f7 <- tempfile(fileext = ".csv"), col.names = FALSE) #' #' # write data to .R syntax file and append additional data #' export(mtcars, file = f8 <- tempfile(fileext = ".R"), format = "dump") #' export(mtcars, file = f8, format = "dump", append = TRUE) #' source(f8, echo = TRUE) #' #' # write to an Excel workbook #' \dontrun{ #' ## export a single data frame #' export(mtcars, f9 <- tempfile(fileext = ".xlsx")) #' #' ## export NAs to Excel as missing via args passed to `...` #' mtcars$drat <- NA_real_ #' mtcars %>% export(f10 <- tempfile(fileext = ".xlsx"), keepNA = TRUE) #' #' ## export a list of data frames as worksheets #' export(list(a = mtcars, b = iris), f11 <- tempfile(fileext = ".xlsx")) #' #' ## export, adding a new sheet to an existing workbook #' export(iris, f12 <- tempfile(fileext = ".xlsx"), which = "iris") #' } #' #' # write data to a zip-compressed CSV #' export(mtcars, f13 <- tempfile(fileext = ".csv.zip")) #' #' # cleanup #' unlink(f1) #' # unlink(f2) #' unlink(f3) #' unlink(f4) #' unlink(f5) #' unlink(f6) #' unlink(f7) #' unlink(f8) #' # unlink(f9) #' # unlink(f10) #' # unlink(f11) #' # unlink(f12) #' # unlink(f13) #' @seealso \code{\link{.export}}, \code{\link{characterize}}, \code{\link{import}}, \code{\link{convert}}, \code{\link{export_list}} #' @importFrom haven labelled #' @export export <- function(x, file, format, ...) { if (missing(file) & missing(format)) { stop("Must specify 'file' and/or 'format'") } else if (!missing(file) & !missing(format)) { fmt <- tolower(format) cfile <- file f <- find_compress(file) file <- f$file compress <- f$compress } else if (!missing(file) & missing(format)) { cfile <- file f <- find_compress(file) file <- f$file compress <- f$compress fmt <- get_ext(file) } else if (!missing(format)) { fmt <- get_type(format) file <- paste0(as.character(substitute(x)), ".", fmt) compress <- NA_character_ } fmt <- get_type(fmt) outfile <- file if (fmt %in% c("gz", "gzip")) { fmt <- tools::file_ext(tools::file_path_sans_ext(file, compression = FALSE)) file <- gzfile(file, "w") on.exit(close(file)) } data_name <- as.character(substitute(x)) if (!is.data.frame(x) & !is.matrix(x)) { if (!fmt %in% c("xlsx", "html", "rdata", "rds", "json")) { stop("'x' is not a data.frame or matrix") } } else if (is.matrix(x)) { x <- as.data.frame(x) } class(file) <- c(paste0("rio_", fmt), class(file)) .export(file = file, x = x, ...) if (!is.na(compress)) { cfile <- compress_out(cfile = cfile, filename = file, type = compress) unlink(file) return(invisible(cfile)) } invisible(unclass(outfile)) } rio/R/convert.R0000644000176200001440000000375214135736655013075 0ustar liggesusers#' @title Convert from one file format to another #' @description This function constructs a data frame from a data file using \code{\link{import}} and uses \code{\link{export}} to write the data to disk in the format indicated by the file extension. #' @param in_file A character string naming an input file. #' @param out_file A character string naming an output file. #' @param in_opts A named list of options to be passed to \code{\link{import}}. #' @param out_opts A named list of options to be passed to \code{\link{export}}. #' @return A character string containing the name of the output file (invisibly). #' @examples #' # create a file to convert #' export(mtcars, dta_file <- tempfile(fileext = ".dta")) #' #' # convert Stata to CSV and open converted file #' convert(dta_file, csv_file <- tempfile(fileext = ".csv")) #' head(import(csv_file)) #' #' # correct an erroneous file format #' export(mtcars, csv_file2 <- tempfile(fileext = ".csv"), format = "tsv") #' convert(csv_file2, csv_file, in_opts = list(format = "tsv")) #' #' # convert serialized R data.frame to JSON #' export(mtcars, rds_file <- tempfile(fileext = ".rds")) #' convert(rds_file, json_file <- tempfile(fileext = ".json")) #' #' # cleanup #' unlink(csv_file) #' unlink(csv_file2) #' unlink(rds_file) #' unlink(dta_file) #' unlink(json_file) #' #' \dontrun{\donttest{ #' # convert from the command line: #' ## Rscript -e "rio::convert('mtcars.dta', 'mtcars.csv')" #' }} #' #' @seealso \href{https://lbraglia.github.io/}{Luca Braglia} has created a Shiny app called \href{https://github.com/lbraglia/rioweb}{rioweb} that provides access to the file conversion features of rio through a web browser. #' @export convert <- function(in_file, out_file, in_opts=list(), out_opts=list()) { if (missing(out_file)) { stop("'outfile' is missing with no default") } invisible(do.call("export", c(list(file = out_file, x = do.call("import", c(list(file=in_file), in_opts))), out_opts))) } rio/R/gather_attrs.R0000644000176200001440000000560614063055232014065 0ustar liggesusers#' @rdname gather_attrs #' @title Gather attributes from data frame variables #' @description \code{gather_attrs} moves variable-level attributes to the data frame level and \code{spread_attrs} reverses that operation. #' @details \code{\link{import}} attempts to standardize the return value from the various import functions to the extent possible, thus providing a uniform data structure regardless of what import package or function is used. It achieves this by storing any optional variable-related attributes at the variable level (i.e., an attribute for \code{mtcars$mpg} is stored in \code{attributes(mtcars$mpg)} rather than \code{attributes(mtcars)}). \code{gather_attrs} moves these to the data frame level (i.e., in \code{attributes(mtcars)}). \code{spread_attrs} moves attributes back to the variable level. #' @param x A data frame. #' @return \code{x}, with variable-level attributes stored at the data frame level. #' @examples #' e <- try(import("http://www.stata-press.com/data/r13/auto.dta")) #' if (!inherits(e, "try-error")) { #' str(e) #' g <- gather_attrs(e) #' str(attributes(e)) #' str(g) #' } #' @seealso \code{\link{import}}, \code{\link{characterize}} #' @importFrom stats setNames #' @export gather_attrs <- function(x) { if (!inherits(x, "data.frame")) { stop("'x' is not a data.frame") } dfattrs <- attributes(x) if ("label" %in% names(dfattrs)) { names(dfattrs)[names(dfattrs) == "label"] <- "title" } varattrs <- rep(list(list()), length(x)) for (i in seq_along(x)) { a <- attributes(x[[i]]) varattrs[[i]] <- a[!names(a) %in% c("levels", "class")] attr(x[[i]], "label") <- NULL if (any(grepl("labelled", class(x[[i]])))) { x[[i]] <- haven::zap_labels(x[[i]]) } f <- grep("^format", names(attributes(x[[i]])), value = TRUE) if (length(f)) { attr(x[[i]], f) <- NULL } rm(f) } if (any(sapply(varattrs, length))) { attrnames <- sort(unique(unlist(lapply(varattrs, names)))) outattrs <- stats::setNames(lapply(attrnames, function(z) { stats::setNames(lapply(varattrs, `[[`, z), names(x)) }), attrnames) attributes(x) <- c(dfattrs, outattrs) } x } #' @rdname gather_attrs #' @export spread_attrs <- function(x) { if (!inherits(x, "data.frame")) { stop("'x' is not a data.frame") } dfattrs <- attributes(x) d_level_attrs <- names(dfattrs) %in% c("row.names", "class", "names", "notes", "title") varattrs <- dfattrs[!d_level_attrs] for (i in seq_along(x)) { a <- attributes(x[[i]]) attributes(x[[i]]) <- c(a, lapply(varattrs, `[[`, i)) } if ("title" %in% names(dfattrs)) { names(dfattrs)[names(dfattrs) == "title"] <- "label" } attributes(x) <- dfattrs[d_level_attrs] x } rio/R/convert_google_url.R0000644000176200001440000000105612657050345015276 0ustar liggesusersconvert_google_url <- function(url, export_as = "csv") { ## convert a google sheets url to google csv export URL ## extract the doc-id and append /export?format = csv to it. (default) google_key <- regmatches(url, regexpr("[[:alnum:]_-]{30,}", url)) if (grepl('gid=[[:digit:]]+', url)) { gidpart <- paste0(regmatches(url, regexpr("gid=[[:digit:]]+", url))) } else { gidpart <- "gid=0" } return(paste0('https://docs.google.com/spreadsheets/d/', google_key, '/export?', gidpart, '&format=', export_as)) } rio/R/fwf2.R0000644000176200001440000000325513075640600012241 0ustar liggesusers#' @importFrom utils read.table read.fwf2 <- function (file, widths, header = FALSE, sep = "\t", skip = 0, n = -1, quote = "", stringsAsFactors = FALSE, ...) { doone <- function(x) { x <- substring(x, first, last) x[!nzchar(x)] <- NA_character_ paste0(x, collapse = sep) } if (is.list(widths)) { recordlength <- length(widths) widths <- do.call("c", widths) } else { recordlength <- 1L } drop <- (widths < 0L) widths <- abs(widths) if (is.character(file)) { file <- file(file, "rt") on.exit(close(file), add = TRUE) } else if (!isOpen(file)) { open(file, "rt") on.exit(close(file), add = TRUE) } if (skip) readLines(file, n = skip) if (header) { headerline <- readLines(file, n = 1L) text[1] <- headerline } raw <- readLines(file, n = n) nread <- length(raw) if (recordlength > 1L && nread%%recordlength) { raw <- raw[1L:(nread - nread%%recordlength)] warning(sprintf(ngettext(nread%%recordlength, "last record incomplete, %d line discarded", "last record incomplete, %d lines discarded"), nread%%recordlength), domain = NA) } if (recordlength > 1L) { raw <- matrix(raw, nrow = recordlength) raw <- apply(raw, 2L, paste, collapse = "") } st <- c(1L, 1L + cumsum(widths)) first <- st[-length(st)][!drop] last <- cumsum(widths)[!drop] if(header) text <- c(headerline, sapply(raw, doone)) else text <- sapply(raw, doone) read.table(text = text, header = header, sep = sep, quote = quote, stringsAsFactors = stringsAsFactors, ...) } rio/R/characterize.R0000644000176200001440000000563014135737107014047 0ustar liggesusers#' @rdname characterize #' @title Character conversion of labelled data #' @description Convert labelled variables to character or factor #' @param x A vector or data frame. #' @param coerce_character A logical indicating whether to additionally coerce character columns to factor (in \code{factorize}). Default \code{FALSE}. #' @param \dots additional arguments passed to methods #' @details \code{characterize} converts a vector with a \code{labels} attribute of named levels into a character vector. \code{factorize} does the same but to factors. This can be useful at two stages of a data workflow: (1) importing labelled data from metadata-rich file formats (e.g., Stata or SPSS), and (2) exporting such data to plain text files (e.g., CSV) in a way that preserves information. #' @examples #' # vector method #' x <- structure(1:4, labels = c("A" = 1, "B" = 2, "C" = 3)) #' characterize(x) #' factorize(x) #' #' # data frame method #' x <- data.frame(v1 = structure(1:4, labels = c("A" = 1, "B" = 2, "C" = 3)), #' v2 = structure(c(1,0,0,1), labels = c("foo" = 0, "bar" = 1))) #' str(factorize(x)) #' str(characterize(x)) #' #' # comparison of exported file contents #' import(export(x, csv_file <- tempfile(fileext = ".csv"))) #' import(export(factorize(x), csv_file)) #' #' # cleanup #' unlink(csv_file) #' #' @seealso \code{\link{gather_attrs}} #' @export characterize <- function(x, ...) { UseMethod("characterize") } #' @rdname characterize #' @export factorize <- function(x, ...) { UseMethod("factorize") } #' @rdname characterize #' @export characterize.default <- function(x, ...) { # retain variable label, if present if (!is.null(attributes(x)[["label"]])) { varlab <- attributes(x)[["label"]] } else { varlab <- NULL } if (!is.null(attributes(x)[["labels"]])) { x <- as.character(factorize(x, ...)) if (!is.null(varlab)) { attr(x, "label") <- varlab } } return(x) } #' @rdname characterize #' @export characterize.data.frame <- function(x, ...) { x[] <- lapply(x, characterize, ...) x } #' @rdname characterize #' @export factorize.default <- function(x, coerce_character=FALSE, ...) { # retain variable label, if present if (!is.null(attributes(x)[["label"]])) { varlab <- attributes(x)[["label"]] } else { varlab <- NULL } if (!is.null(attributes(x)[["labels"]])) { x <- factor(x, attributes(x)[["labels"]], names(attributes(x)[["labels"]]), ...) } else if (is.character(x) && isTRUE(coerce_character)) { levs <- sort(unique(x)) x <- factor(x, levs) } if (!is.null(varlab)) { attr(x, "label") <- varlab } return(x) } #' @rdname characterize #' @export factorize.data.frame <- function(x, ...) { x[] <- lapply(x, factorize, ...) x } rio/R/rio.R0000644000176200001440000000325214135737001012163 0ustar liggesusers#' @docType package #' @name rio #' @title A Swiss-Army Knife for Data I/O #' @description The aim of rio is to make data file input and output as easy as possible. \code{\link{export}} and \code{\link{import}} serve as a Swiss-army knife for painless data I/O for data from almost any file format by inferring the data structure from the file extension, natively reading web-based data sources, setting reasonable defaults for import and export, and relying on efficient data import and export packages. An additional convenience function, \code{\link{convert}}, provides a simple method for converting between file types. #' #' Note that some of rio's functionality is provided by \sQuote{Suggests} dependendencies, meaning they are not installed by default. Use \code{\link{install_formats}} to make sure these packages are available for use. #' #' @examples #' # export #' library("datasets") #' export(mtcars, csv_file <- tempfile(fileext = ".csv")) # comma-separated values #' export(mtcars, rds_file <- tempfile(fileext = ".rds")) # R serialized #' export(mtcars, sav_file <- tempfile(fileext = ".sav")) # SPSS #' #' # import #' x <- import(csv_file) #' y <- import(rds_file) #' z <- import(sav_file) #' #' # convert sav (SPSS) to dta (Stata) #' convert(sav_file, dta_file <- tempfile(fileext = ".dta")) #' #' # cleanup #' unlink(c(csv_file, rds_file, sav_file, dta_file)) #' #' @references #' \href{https://github.com/Stan125/GREA}{GREA} provides an RStudio add-in to import data using rio. #' @seealso \code{\link{import}}, \code{\link{import_list}}, \code{\link{export}}, \code{\link{export_list}}, \code{\link{convert}}, \code{\link{install_formats}} NULL rio/R/set_class.R0000644000176200001440000000131513276243310013350 0ustar liggesusersset_class <- function(x, class = NULL) { if (is.null(class)) { return(x) } else if ("data.table" %in% class) { if (inherits(x, "data.table")) { return(x) } return(data.table::as.data.table(x)) } else if ("tibble" %in% class || "tbl_df" %in% class || "tbl" %in% class) { if (inherits(x, "tbl")) { return(x) } return(tibble::as_tibble(x)) } out <- structure(x, class = "data.frame") # add row names in case `x` wasn't already a data frame (e.g., matlab list) if (!length(rownames(out))) { rownames(out) <- as.character(seq_len(length(out[,1L,drop = TRUE]))) } return(out) } rio/R/export_list.R0000644000176200001440000000611414135740451013751 0ustar liggesusers#' @title Export list of data frames to files #' @description Use \code{\link{export}} to export a list of data frames to a vector of file names or a filename pattern. #' @param x A list of data frames to be written to files. #' @param file A character vector string containing a single file name with a \code{\%s} wildcard placeholder, or a vector of file paths for multiple files to be imported. If \code{x} elements are named, these will be used in place of \code{\%s}, otherwise numbers will be used; all elements must be named for names to be used. #' @param \dots Additional arguments passed to \code{\link{export}}. #' @return The name(s) of the output file(s) as a character vector (invisibly). #' @details \code{\link{export}} can export a list of data frames to a single multi-dataset file (e.g., an Rdata or Excel .xlsx file). Use \code{export_list} to export such a list to \emph{multiple} files. #' @examples #' library('datasets') #' export(list(mtcars1 = mtcars[1:10,], #' mtcars2 = mtcars[11:20,], #' mtcars3 = mtcars[21:32,]), #' xlsx_file <- tempfile(fileext = ".xlsx") #' ) #' #' # import all worksheets #' mylist <- import_list(xlsx_file) #' #' # re-export as separate named files #' csv_files1 <- sapply(1:3, function(x) tempfile(fileext = paste0("-", x, ".csv"))) #' export_list(mylist, file = csv_files1) #' #' # re-export as separate files using a name pattern #' export_list(mylist, file = csv_files2 <- tempfile(fileext = "%s.csv")) #' #' # cleanup #' unlink(xlsx_file) #' unlink(csv_files1) #' unlink(csv_files2) #' #' @seealso \code{\link{import}}, \code{\link{import_list}}, \code{\link{export}} #' @export export_list <- function( x, file, ... ) { if (inherits(x, "data.frame")) { stop("'x' must be a list. Perhaps you want export()?") } if (is.null(file)) { stop("'file' must be a character vector") } else if (length(file) == 1L) { if (!grepl("%s", file, fixed = TRUE)) { stop("'file' must have a %s placehold") } if (is.null(names(x))) { outfiles <- sprintf(file, seq_along(x)) } else { if (any(nchar(names(x))) == 0) { stop("All elements of 'x' must be named or all must be unnamed") } if (anyDuplicated(names(x))) { stop("Names of elements in 'x' are not unique") } outfiles <- sprintf(file, names(x)) } } else { if (length(x) != length(file)) { stop("'file' must be same length as 'x', or a single pattern with a %s placeholder") } if (anyDuplicated(file)) { stop("File names are not unique") } outfiles <- file } out <- list() for (f in seq_along(x)) { out[[f]] <- try(export(x[[f]], file = outfiles[f], ...), silent = TRUE) if (inherits(out[[f]], "try-error")) { warning(sprintf("Export failed for element %d, filename: %s", f, outfiles[f])) } } invisible(outfiles) } rio/R/onLoad.R0000644000176200001440000000071013324631656012612 0ustar liggesusers.onLoad <- function(libname, pkgname) { options(datatable.fread.dec.experiment=FALSE) } .onAttach <- function(libname, pkgname) { if (interactive()) { w <- uninstalled_formats() if (length(w)) { msg <- "The following rio suggested packages are not installed: %s\nUse 'install_formats()' to install them" packageStartupMessage(sprintf(msg, paste0(sQuote(w), collapse = ", "))) } } } rio/R/import.R0000644000176200001440000002656514135736503012726 0ustar liggesusers#' @rdname import #' @title Import #' @description Read in a data.frame from a file. Exceptions to this rule are Rdata, RDS, and JSON input file formats, which return the originally saved object without changing its class. #' @param file A character string naming a file, URL, or single-file .zip or .tar archive. #' @param format An optional character string code of file format, which can be used to override the format inferred from \code{file}. Shortcuts include: \dQuote{,} (for comma-separated values), \dQuote{;} (for semicolon-separated values), and \dQuote{|} (for pipe-separated values). #' @template setclass #' @param which This argument is used to control import from multi-object files; as a rule \code{import} only ever returns a single data frame (use \code{\link{import_list}} to import multiple data frames from a multi-object file). If \code{file} is a compressed directory, \code{which} can be either a character string specifying a filename or an integer specifying which file (in locale sort order) to extract from the compressed directory. For Excel spreadsheets, this can be used to specify a sheet name or number. For .Rdata files, this can be an object name. For HTML files, it identifies which table to extract (from document order). Ignored otherwise. A character string value will be used as a regular expression, such that the extracted file is the first match of the regular expression against the file names in the archive. #' @param \dots Additional arguments passed to the underlying import functions. For example, this can control column classes for delimited file types, or control the use of haven for Stata and SPSS or readxl for Excel (.xlsx) format. See details below. #' @return A data frame. If \code{setclass} is used, this data frame may have additional class attribute values, such as \dQuote{tibble} or \dQuote{data.table}. #' @details This function imports a data frame or matrix from a data file with the file format based on the file extension (or the manually specified format, if \code{format} is specified). #' #' \code{import} supports the following file formats: #' #' \itemize{ #' \item Comma-separated data (.csv), using \code{\link[data.table]{fread}} or, if \code{fread = FALSE}, \code{\link[utils]{read.table}} with \code{row.names = FALSE} and \code{stringsAsFactors = FALSE} #' \item Pipe-separated data (.psv), using \code{\link[data.table]{fread}} or, if \code{fread = FALSE}, \code{\link[utils]{read.table}} with \code{sep = '|'}, \code{row.names = FALSE} and \code{stringsAsFactors = FALSE} #' \item Tab-separated data (.tsv), using \code{\link[data.table]{fread}} or, if \code{fread = FALSE}, \code{\link[utils]{read.table}} with \code{row.names = FALSE} and \code{stringsAsFactors = FALSE} #' \item SAS (.sas7bdat), using \code{\link[haven]{read_sas}}. #' \item SAS XPORT (.xpt), using \code{\link[haven]{read_xpt}} or, if \code{haven = FALSE}, \code{\link[foreign]{read.xport}}. #' \item SPSS (.sav), using \code{\link[haven]{read_sav}}. If \code{haven = FALSE}, \code{\link[foreign]{read.spss}} can be used. #' \item SPSS compressed (.zsav), using \code{\link[haven]{read_sav}}. #' \item Stata (.dta), using \code{\link[haven]{read_dta}}. If \code{haven = FALSE}, \code{\link[foreign]{read.dta}} can be used. #' \item SPSS Portable Files (.por), using \code{\link[haven]{read_por}}. #' \item Excel (.xls and .xlsx), using \code{\link[readxl]{read_excel}}. Use \code{which} to specify a sheet number. For .xlsx files, it is possible to set \code{readxl = FALSE}, so that \code{\link[openxlsx]{read.xlsx}} can be used instead of readxl (the default). #' \item R syntax object (.R), using \code{\link[base]{dget}} #' \item Saved R objects (.RData,.rda), using \code{\link[base]{load}} for single-object .Rdata files. Use \code{which} to specify an object name for multi-object .Rdata files. This can be any R object (not just a data frame). #' \item Serialized R objects (.rds), using \code{\link[base]{readRDS}}. This can be any R object (not just a data frame). #' \item Epiinfo (.rec), using \code{\link[foreign]{read.epiinfo}} #' \item Minitab (.mtp), using \code{\link[foreign]{read.mtp}} #' \item Systat (.syd), using \code{\link[foreign]{read.systat}} #' \item "XBASE" database files (.dbf), using \code{\link[foreign]{read.dbf}} #' \item Weka Attribute-Relation File Format (.arff), using \code{\link[foreign]{read.arff}} #' \item Data Interchange Format (.dif), using \code{\link[utils]{read.DIF}} #' \item Fortran data (no recognized extension), using \code{\link[utils]{read.fortran}} #' \item Fixed-width format data (.fwf), using a faster version of \code{\link[utils]{read.fwf}} that requires a \code{widths} argument and by default in rio has \code{stringsAsFactors = FALSE}. If \code{readr = TRUE}, import will be performed using \code{\link[readr]{read_fwf}}, where \code{widths} should be: \code{NULL}, a vector of column widths, or the output of \code{\link[readr]{fwf_empty}}, \code{\link[readr]{fwf_widths}}, or \code{\link[readr]{fwf_positions}}. #' \item gzip comma-separated data (.csv.gz), using \code{\link[utils]{read.table}} with \code{row.names = FALSE} and \code{stringsAsFactors = FALSE} #' \item \href{https://github.com/csvy}{CSVY} (CSV with a YAML metadata header) using \code{\link[data.table]{fread}}. #' \item Apache Arrow Parquet (.parquet), using \code{\link[arrow]{read_parquet}} #' \item Feather R/Python interchange format (.feather), using \code{\link[feather]{read_feather}} #' \item Fast storage (.fst), using \code{\link[fst]{read.fst}} #' \item JSON (.json), using \code{\link[jsonlite]{fromJSON}} #' \item Matlab (.mat), using \code{\link[rmatio]{read.mat}} #' \item EViews (.wf1), using \code{\link[hexView]{readEViews}} #' \item OpenDocument Spreadsheet (.ods), using \code{\link[readODS]{read_ods}}. Use \code{which} to specify a sheet number. #' \item Single-table HTML documents (.html), using \code{\link[xml2]{read_html}}. The data structure will only be read correctly if the HTML file can be converted to a list via \code{\link[xml2]{as_list}}. #' \item Shallow XML documents (.xml), using \code{\link[xml2]{read_xml}}. The data structure will only be read correctly if the XML file can be converted to a list via \code{\link[xml2]{as_list}}. #' \item YAML (.yml), using \code{\link[yaml]{yaml.load}} #' \item Clipboard import (on Windows and Mac OS), using \code{\link[utils]{read.table}} with \code{row.names = FALSE} #' \item Google Sheets, as Comma-separated data (.csv) #' \item GraphPad Prism (.pzfx) using \code{\link[pzfx]{read_pzfx}} #' } #' #' \code{import} attempts to standardize the return value from the various import functions to the extent possible, thus providing a uniform data structure regardless of what import package or function is used. It achieves this by storing any optional variable-related attributes at the variable level (i.e., an attribute for \code{mtcars$mpg} is stored in \code{attributes(mtcars$mpg)} rather than \code{attributes(mtcars)}). If you would prefer these attributes to be stored at the data.frame-level (i.e., in \code{attributes(mtcars)}), see \code{\link{gather_attrs}}. #' #' After importing metadata-rich file formats (e.g., from Stata or SPSS), it may be helpful to recode labelled variables to character or factor using \code{\link{characterize}} or \code{\link{factorize}} respectively. #' #' @note For csv and txt files with row names exported from \code{\link{export}}, it may be helpful to specify \code{row.names} as the column of the table which contain row names. See example below. #' @examples #' # create CSV to import #' export(iris, csv_file <- tempfile(fileext = ".csv")) #' #' # specify `format` to override default format #' export(iris, tsv_file <- tempfile(fileext = ".tsv"), format = "csv") #' stopifnot(identical(import(csv_file), import(tsv_file, format = "csv"))) #' #' # import CSV as a `data.table` #' stopifnot(inherits(import(csv_file, setclass = "data.table"), "data.table")) #' #' # pass arguments to underlying import function #' iris1 <- import(csv_file) #' identical(names(iris), names(iris1)) #' #' export(iris, csv_file2 <- tempfile(fileext = ".csv"), col.names = FALSE) #' iris2 <- import(csv_file2) #' identical(names(iris), names(iris2)) #' #' # set class for the response data.frame as "tbl_df" (from dplyr) #' stopifnot(inherits(import(csv_file, setclass = "tbl_df"), "tbl_df")) #' #' # non-data frame formats supported for RDS, Rdata, and JSON #' export(list(mtcars, iris), rds_file <- tempfile(fileext = ".rds")) #' li <- import(rds_file) #' identical(names(mtcars), names(li[[1]])) #' #' # cleanup #' unlink(csv_file) #' unlink(csv_file2) #' unlink(tsv_file) #' unlink(rds_file) #' #' @seealso \code{\link{import_list}}, \code{\link{.import}}, \code{\link{characterize}}, \code{\link{gather_attrs}}, \code{\link{export}}, \code{\link{convert}} #' @importFrom tools file_ext file_path_sans_ext #' @importFrom stats na.omit setNames #' @importFrom utils installed.packages untar unzip tar zip type.convert capture.output #' @importFrom curl curl_fetch_memory parse_headers #' @importFrom data.table as.data.table is.data.table #' @importFrom tibble as_tibble is_tibble #' @export import <- function(file, format, setclass, which, ...) { if (grepl("^http.*://", file)) { file <- remote_to_local(file, format = format) } if ((file != "clipboard") && !file.exists(file)) { stop("No such file") } if (grepl("\\.zip$", file)) { if (missing(which)) { file <- parse_zip(file) } else { file <- parse_zip(file, which = which) } } else if(grepl("\\.tar", file)) { if (missing(which)) { which <- 1 } file <- parse_tar(file, which = which) } if (missing(format)) { fmt <- get_ext(file) if (fmt %in% c("gz", "gzip")) { fmt <- tools::file_ext(tools::file_path_sans_ext(file, compression = FALSE)) file <- gzfile(file) } else { fmt <- get_type(fmt) } } else { fmt <- get_type(format) } args_list <- list(...) class(file) <- c(paste0("rio_", fmt), class(file)) if (missing(which)) { x <- .import(file = file, ...) } else { x <- .import(file = file, which = which, ...) } # if R serialized object, just return it without setting object class if (inherits(file, c("rio_rdata", "rio_rds", "rio_json"))) { return(x) } # otherwise, make sure it's a data frame (or requested class) if (missing(setclass) || is.null(setclass)) { if ("data.table" %in% names(args_list) && isTRUE(args_list[["data.table"]])) { return(set_class(x, class = "data.table")) } else { return(set_class(x, class = "data.frame")) } } else { if ("data.table" %in% names(args_list) && isTRUE(args_list[["data.table"]])) { if (setclass != "data.table") { warning(sprintf("'data.table = TRUE' argument overruled. Using setclass = '%s'", setclass)) return(set_class(x, class = setclass)) } else { return(set_class(x, class = "data.table")) } } else { return(set_class(x, class = setclass)) } } } rio/R/import_methods.R0000644000176200001440000004007614141717535014443 0ustar liggesusers#' @importFrom data.table fread import_delim <- function(file, which = 1, fread = TRUE, sep = "auto", header = "auto", stringsAsFactors = FALSE, data.table = FALSE, ...) { if (isTRUE(fread) & !inherits(file, "connection")) { arg_reconcile(data.table::fread, input = file, sep = sep, header = header, stringsAsFactors = stringsAsFactors, data.table = data.table, ..., .docall = TRUE) } else { if (isTRUE(fread) & inherits(file, "connection")) { message("data.table::fread() does not support reading from connections. Using utils::read.table() instead.") } if (missing(sep) || is.null(sep) || sep == "auto") { if (inherits(file, "rio_csv")) { sep <- "," } else if (inherits(file, "rio_csv2")) { sep <- ";" } else if (inherits(file, "rio_psv")) { sep <- "|" } else { sep <- "\t" } } if (missing(header) || is.null(header) || header == "auto") { header <- TRUE } arg_reconcile(utils::read.table, file=file, sep=sep, header=header, stringsAsFactors = stringsAsFactors, ..., .docall = TRUE) } } #' @export .import.rio_dat <- function(file, which = 1, ...) { message(sprintf("Ambiguous file format ('.dat'), but attempting 'data.table::fread(\"%s\")'", file)) import_delim(file = file, ...) } #' @export .import.rio_tsv <- function(file, sep = "auto", which = 1, fread = TRUE, dec = if (sep %in% c("\t", "auto")) "." else ",", ...) { import_delim(file = file, sep = sep, fread = fread, dec = dec, ...) } #' @export .import.rio_txt <- function(file, sep = "auto", which = 1, fread = TRUE, dec = if (sep %in% c(",", "auto")) "." else ",", ...) { import_delim(file = file, sep = sep, fread = fread, dec = dec, ...) } #' @export .import.rio_csv <- function(file, sep = ",", which = 1, fread = TRUE, dec = if (sep %in% c(",", "auto")) "." else ",", ...) { import_delim(file = file, sep = if (sep == ",") "auto" else sep, fread = fread, dec = dec, ...) } #' @export .import.rio_csv2 <- function(file, sep = ";", which = 1, fread = TRUE, dec = if (sep %in% c(";", "auto")) "," else ".", ...) { import_delim(file = file, sep = if (sep == ";") "auto" else sep, fread = fread, dec = dec, ...) } #' @export .import.rio_csvy <- function(file, sep = ",", which = 1, fread = TRUE, dec = if (sep %in% c(",", "auto")) "." else ",", yaml = TRUE, ...) { import_delim(file = file, sep = if (sep == ",") "auto" else sep, fread = fread, dec = dec, yaml = yaml, ...) } #' @export .import.rio_psv <- function(file, sep = "|", which = 1, fread = TRUE, dec = if (sep %in% c("|", "auto")) "." else ",", ...) { import_delim(file = file, sep = if (sep == "|") "auto" else sep, fread = fread, dec = dec, ...) } #' @importFrom utils read.fwf #' @export .import.rio_fwf <- function(file, which = 1, widths, header = FALSE, col.names, comment = "#", readr = FALSE, progress = getOption("verbose", FALSE), ...) { if (missing(widths)) { stop("Import of fixed-width format data requires a 'widths' argument. See ? read.fwf().") } a <- list(...) if (isTRUE(readr)) { requireNamespace("readr") if (is.null(widths)) { if (!missing(col.names)) { widths <- readr::fwf_empty(file = file, col_names = col.names) } else { widths <- readr::fwf_empty(file = file) } readr::read_fwf(file = file, col_positions = widths, progress = progress, comment = comment, ...) } else if (is.numeric(widths)) { if (any(widths < 0)) { if (!"col_types" %in% names(a)) { col_types <- rep("?", length(widths)) col_types[widths < 0] <- "?" col_types <- paste0(col_types, collapse = "") } if (!missing(col.names)) { widths <- readr::fwf_widths(abs(widths), col_names = col.names) } else { widths <- readr::fwf_widths(abs(widths)) } readr::read_fwf(file = file, col_positions = widths, col_types = col_types, progress = progress, comment = comment, ...) } else { if (!missing(col.names)) { widths <- readr::fwf_widths(abs(widths), col_names = col.names) } else { widths <- readr::fwf_widths(abs(widths)) } readr::read_fwf(file = file, col_positions = widths, progress = progress, comment = comment, ...) } } else if (is.list(widths)) { if (!c("begin", "end") %in% names(widths)) { if (!missing(col.names)) { widths <- readr::fwf_widths(widths, col_names = col.names) } else { widths <- readr::fwf_widths(widths) } } readr::read_fwf(file = file, col_positions = widths, progress = progress, comment = comment, ...) } } else { if (!missing(col.names)) { read.fwf2(file = file, widths = widths, header = header, col.names = col.names, ...) } else { read.fwf2(file = file, widths = widths, header = header, ...) } } } #' @export .import.rio_r <- function(file, which = 1, ...) { dget(file = file, ...) } #' @export .import.rio_dump <- function(file, which = 1, envir = new.env(), ...) { source(file = file, local = envir) if (length(list(...)) > 0) { warning("File imported using load. Arguments to '...' ignored.") } if (missing(which)) { if (length(ls(envir)) > 1) { warning("Dump file contains multiple objects. Returning first object.") } which <- 1 } if (is.numeric(which)) { get(ls(envir)[which], envir) } else { get(ls(envir)[grep(which, ls(envir))[1]], envir) } } #' @export .import.rio_rds <- function(file, which = 1, ...) { if (length(list(...))>0) { warning("File imported using readRDS. Arguments to '...' ignored.") } readRDS(file = file) } #' @export .import.rio_rdata <- function(file, which = 1, envir = new.env(), ...) { load(file = file, envir = envir) if (length(list(...)) > 0) { warning("File imported using load. Arguments to '...' ignored.") } if (missing(which)) { if (length(ls(envir)) > 1) { warning("Rdata file contains multiple objects. Returning first object.") } which <- 1 } if (is.numeric(which)) { get(ls(envir)[which], envir) } else { get(ls(envir)[grep(which, ls(envir))[1]], envir) } } #' @export .import.rio_rda <- .import.rio_rdata #' @export .import.rio_feather <- function(file, which = 1, ...) { requireNamespace("feather") feather::read_feather(path = file) } #' @export .import.rio_fst <- function(file, which = 1, ...) { requireNamespace("fst") fst::read.fst(path = file, ...) } #' @export .import.rio_matlab <- function(file, which = 1, ...) { requireNamespace("rmatio") rmatio::read.mat(filename = file) } #' @importFrom foreign read.dta #' @importFrom haven read_dta #' @export .import.rio_dta <- function(file, haven = TRUE, convert.factors = FALSE,...) { if (isTRUE(haven)) { arg_reconcile(haven::read_dta, file = file, ..., .docall = TRUE, .finish = standardize_attributes) } else { out <- arg_reconcile(foreign::read.dta, file = file, convert.factors = convert.factors, ..., .docall = TRUE) attr(out, "expansion.fields") <- NULL attr(out, "time.stamp") <- NULL standardize_attributes(out) } } #' @importFrom foreign read.dbf #' @export .import.rio_dbf <- function(file, which = 1, as.is = TRUE, ...) { foreign::read.dbf(file = file, as.is = as.is) } #' @importFrom utils read.DIF #' @export .import.rio_dif <- function(file, which = 1, ...) { utils::read.DIF(file = file, ...) } #' @importFrom haven read_sav #' @importFrom foreign read.spss #' @export .import.rio_sav <- function(file, which = 1, haven = TRUE, to.data.frame = TRUE, use.value.labels = FALSE, ...) { if (isTRUE(haven)) { standardize_attributes(haven::read_sav(file = file)) } else { standardize_attributes(foreign::read.spss(file = file, to.data.frame = to.data.frame, use.value.labels = use.value.labels, ...)) } } #' @importFrom haven read_sav #' @export .import.rio_zsav <- function(file, which = 1, ...) { standardize_attributes(haven::read_sav(file = file)) } #' @importFrom haven read_por #' @export .import.rio_spss <- function(file, which = 1, ...) { standardize_attributes(haven::read_por(file = file)) } #' @importFrom haven read_sas #' @export .import.rio_sas7bdat <- function(file, which = 1, column.labels = FALSE, ...) { standardize_attributes(haven::read_sas(data_file = file, ...)) } #' @importFrom foreign read.xport #' @importFrom haven read_xpt #' @export .import.rio_xpt <- function(file, which = 1, haven = TRUE, ...) { if (isTRUE(haven)) { standardize_attributes(haven::read_xpt(file = file, ...)) } else { foreign::read.xport(file = file) } } #' @importFrom foreign read.mtp #' @export .import.rio_mtp <- function(file, which = 1, ...) { foreign::read.mtp(file = file, ...) } #' @importFrom foreign read.systat #' @export .import.rio_syd <- function(file, which = 1, ...) { foreign::read.systat(file = file, to.data.frame = TRUE, ...) } #' @export .import.rio_json <- function(file, which = 1, ...) { requireNamespace("jsonlite") jsonlite::fromJSON(txt = file, ...) } #' @importFrom foreign read.epiinfo #' @export .import.rio_rec <- function(file, which = 1, ...) { foreign::read.epiinfo(file = file, ...) } #' @importFrom foreign read.arff #' @export .import.rio_arff <- function(file, which = 1, ...) { foreign::read.arff(file = file) } #' @importFrom readxl read_xls #' @export .import.rio_xls <- function(file, which = 1, ...) { requireNamespace("readxl") arg_reconcile(read_xls, path = file, ..., sheet = which, .docall = TRUE, .remap = c(colNames = 'col_names', na.strings = 'na')) } #' @importFrom readxl read_xlsx #' @importFrom openxlsx read.xlsx #' @export .import.rio_xlsx <- function(file, which = 1, readxl = TRUE, ...) { if (isTRUE(readxl)) { requireNamespace("readxl") arg_reconcile(read_xlsx, path = file, ..., sheet = which, .docall = TRUE, .remap = c(colNames = 'col_names', na.strings = 'na')) } else { requireNamespace("openxlsx") arg_reconcile(read.xlsx, xlsxFile = file, ..., sheet = which, .docall = TRUE, .remap = c(col_names = 'colNames', na = 'na.strings')) } } #' @importFrom utils read.fortran #' @export .import.rio_fortran <- function(file, which = 1, style, ...) { if (missing(style)) { stop("Import of Fortran format data requires a 'style' argument. See ? utils::read.fortran().") } utils::read.fortran(file = file, format = style, ...) } #' @export .import.rio_ods <- function(file, which = 1, header = TRUE, ...) { requireNamespace("readODS") "read_ods" <- readODS::read_ods a <- list(...) if ("sheet" %in% names(a)) { which <- a[["sheet"]] a[["sheet"]] <- NULL } if ("col_names" %in% names(a)) { header <- a[["col_names"]] a[["col_names"]] <- NULL } frml <- formals(readODS::read_ods) unused <- setdiff(names(a), names(frml)) if ("path" %in% names(a)) { unused <- c(unused, 'path') a[["path"]] <- NULL } if (length(unused)>0) { warning("The following arguments were ignored for read_ods:\n", paste(unused, collapse = ', ')) } a <- a[intersect(names(a), names(frml))] do.call("read_ods", c(list(path = file, sheet = which, col_names = header),a)) } #' @importFrom utils type.convert #' @export .import.rio_xml <- function(file, which = 1, stringsAsFactors = FALSE, ...) { requireNamespace("xml2") x <- xml2::as_list(xml2::read_xml(unclass(file)))[[1L]] d <- do.call("rbind", c(lapply(x, unlist))) row.names(d) <- 1:nrow(d) d <- as.data.frame(d, stringsAsFactors = stringsAsFactors) tc2 <- function(x) { out <- utils::type.convert(x, as.is = FALSE) if (is.factor(out)) { x } else { out } } if (!isTRUE(stringsAsFactors)) { d[] <- lapply(d, tc2) } else { d[] <- lapply(d, utils::type.convert) } d } # This is a helper function for .import.rio_html extract_html_row <- function(x, empty_value) { # Both and are valid for table data, and may be used when # there is an accented element (e.g. the first row of the table) to_extract <- x[names(x) %in% c("th", "td")] # Insert a value into cells that eventually will become empty cells (or they # will be dropped and the table will not be generated). Note that this more # complex code for finding the length is required because of html like #
unlist_length <- sapply( lapply(to_extract, unlist), length ) to_extract[unlist_length == 0] <- list(empty_value) unlist(to_extract) } #' @importFrom utils type.convert #' @export .import.rio_html <- function(file, which = 1, stringsAsFactors = FALSE, ..., empty_value = "") { # find all tables tables <- xml2::xml_find_all(xml2::read_html(unclass(file)), ".//table") if (which > length(tables)) { stop(paste0("Requested table exceeds number of tables found in file (", length(tables),")!")) } x <- xml2::as_list(tables[[which]]) if ("tbody" %in% names(x)) { # Note that "tbody" may be specified multiple times in a valid html table x <- unlist(x[names(x) %in% "tbody"], recursive=FALSE) } # loop row-wise over the table and then rbind() ## check for table header to use as column names col_names <- NULL if ("th" %in% names(x[[1]])) { col_names <- extract_html_row(x[[1]], empty_value=empty_value) # Drop the first row since column names have already been extracted from it. x <- x[-1] } out <- do.call("rbind", lapply(x, extract_html_row, empty_value=empty_value)) colnames(out) <- if (is.null(col_names)) { paste0("V", seq_len(ncol(out))) } else { col_names } out <- as.data.frame(out, ..., stringsAsFactors = stringsAsFactors) # set row names rownames(out) <- 1:nrow(out) # type.convert() to numeric, etc. out[] <- lapply(out, utils::type.convert, as.is = TRUE) out } #' @export .import.rio_yml <- function(file, which = 1, stringsAsFactors = FALSE, ...) { requireNamespace("yaml") as.data.frame(yaml::read_yaml(file, ...), stringsAsFactors = stringsAsFactors) } #' @export .import.rio_eviews <- function(file, which = 1, ...) { requireNamespace("hexView") hexView::readEViews(file, ...) } #' @export .import.rio_clipboard <- function(file = "clipboard", which = 1, header = TRUE, sep = "\t", ...) { requireNamespace("clipr") clipr::read_clip_tbl(x = clipr::read_clip(), header = header, sep = sep, ...) } #' @export .import.rio_pzfx <- function(file, which=1, ...) { requireNamespace("pzfx") pzfx::read_pzfx(path=file, table=which, ...) } #' @export .import.rio_parquet <- function(file, which = 1, as_data_frame = TRUE, ...) { requireNamespace("arrow") arrow::read_parquet(file = file, as_data_frame = TRUE, ...) } rio/R/arg_reconcile.R0000644000176200001440000001421714015452325014171 0ustar liggesusers#' @title Reconcile an argument list to any function signature. #' #' @description Adapt an argument list to a function excluding arguments that #' will not be recognized by it, redundant arguments, and un-named #' arguments. #' #' @param fun A function to which an argument list needs to be adapted. Use #' the unquoted name of the function. If it's in a different #' package then the fully qualified unquoted name (e.g. #' \code{utils::read.table}) #' @param ... An arbitrary list of named arguments (unnamed ones will be #' ignored). Arguments in \code{.args} are overridden by #' arguments of the same name (if any) in \code{...} #' @param .args A list or \code{alist} of named arguments, to be merged #' with \code{...}. Arguments in \code{.args} are overridden by #' arguments of the same name (if any) in \code{...} #' @param .docall If set to \code{TRUE} will not only clean up the arguments #' but also execute \code{fun} with those arguments #' (\code{FALSE} by default) and return the results #' @param .include Whitelist. If not empty, only arguments named here will be #' permitted, and only if they satisfy the conditions implied by #' the other arguments. Evaluated before \code{.remap}. #' @param .exclude Blacklist. If not empty, arguments named here will be removed #' even if they satisfy the conditions implied by the other #' arguments. Evaluated before \code{.remap}. #' @param .remap An optional named character vector or named list of character #' values for standardizing arguments that play the same role #' but have different names in different functions. Evaluated #' after \code{.exclude} and \code{.include}. #' @param .warn Whether to issue a warning message (default) when invalid #' arguments need to be discarded. #' @param .error If specified, should be the object to return in the event of #' error. This object will have the error as its #' \code{error} attribute. If not specified an ordinary error is #' thrown with an added hint on the documentation to read for #' troubleshooting. Ignored if \code{.docall} is \code{FALSE}. #' The point of doing this is fault-tolerance-- if this function #' is part of a lengthy process where you want to document an #' error but keep going, you can set \code{.error} to some #' object of a compatible type. That object will be returned in #' the event of error and will have as its \code{"error"} #' attribute the error object. #' @param .finish A function to run on the result before returning it. Ignored #' if \code{.docall} is \code{FALSE}. #' #' @return Either a named list or the result of calling \code{fun} with the #' supplied arguments #' arg_reconcile <- function(fun, ..., .args = alist(), .docall = FALSE, .include = c(), .exclude= c(), .remap = list(), .warn = TRUE, .error = "default", .finish = identity) { # capture the formal arguments of the target function frmls <- formals(fun) # both freeform and an explicit list args <- match.call(expand.dots = FALSE)[["..."]] if (isTRUE(.docall)) { for (ii in names(args)) { try(args[[ii]] <- eval(args[[ii]], parent.frame())) } } # get rid of duplicate arguments, with freeform arguments dupes <- names(args)[duplicated(names(args))] for (ii in dupes) { args[which(names(args) == ii)[-1]] <- NULL } # Merge ... with .args args <- c(args, .args) # Apply whitelist and blacklist. This step also removes duplicates _between_ # the freeform (...) and pre-specified (.args) arguments, with ... versions # taking precedence over the .args versions. This is a consequence of the # intersect() and setdiff() operations and works even if there is no blacklist # nor whitelist if (!missing(.include)) { args <- args[intersect(names(args), .include)] } args <- args[setdiff(names(args), .exclude)] # if any remappings of one argument to another are specified, perform them for (ii in names(.remap)) { if (!.remap[[ii]] %in% names(args) && ii %in% names(args)) { args[[.remap[[ii]] ]] <- args[[ii]] } } # remove any unnamed arguments args[names(args) == ""] <- NULL # if the target function doesn't have "..." as an argument, check to make sure # only recognized arguments get passed, optionally with a warning if (!"..." %in% names(frmls)) { unused <- setdiff(names(args), names(frmls)) if (length(unused)>0){ if (isTRUE(.warn)) { warning("The following arguments were ignored for ", deparse(substitute(fun)), ":\n", paste(unused, collapse = ", ")) } args <- args[intersect(names(args), names(frmls))] } } # the final, cleaned-up arguments either get returned as a list or used on the # function, depending on how .docall is set if (!isTRUE(.docall)) { return(args) } else { # run the function and return the result case oo <- try(do.call(fun, args), silent = TRUE) if (!inherits(oo, "try-error")) { return(.finish(oo)) } else { # construct an informative error... eventually there will be more # detailed info here errorhint <- paste('\nThis error was generated by: ', deparse(match.call()$fun), '\nWith the following arguments:\n', gsub('^list\\(|\\)$', '', paste(deparse(args, control=c('delayPromises')), collapse='\n'))) if (missing(.error)) { stop(attr(oo, "condition")$message, errorhint) } else { attr(.error, "error") <- oo return(.error) } } } } rio/R/extensions.R0000644000176200001440000000747213577132026013606 0ustar liggesusers#' @rdname extensions #' @aliases extensions .import .export #' @title rio Extensions #' @description Writing Import/Export Extensions for rio #' @param file A character string naming a file. #' @param x A data frame or matrix to be written into a file. #' @param \dots Additional arguments passed to methods. #' @return For \code{.import}, an R data.frame. For \code{.export}, \code{file}, invisibly. #' @details rio implements format-specific S3 methods for each type of file that can be imported from or exported to. This happens via internal S3 generics, \code{.import} and \code{.export}. It is possible to write new methods like with any S3 generic (e.g., \code{print}). #' #' As an example, \code{.import.rio_csv} imports from a comma-separated values file. If you want to produce a method for a new filetype with extension \dQuote{myfile}, you simply have to create a function called \code{.import.rio_myfile} that implements a format-specific importing routine and returns a data.frame. rio will automatically recognize new S3 methods, so that you can then import your file using: \code{import("file.myfile")}. #' #' As general guidance, if an import method creates many attributes, these attributes should be stored --- to the extent possible --- in variable-level attributes fields. These can be \dQuote{gathered} to the data.frame level by the user via \code{\link{gather_attrs}}. #' @seealso \code{\link{import}}, \code{\link{export}} #' @export .import <- function(file, ...){ UseMethod('.import') } #' @rdname extensions #' @importFrom tools file_ext #' @export .import.default <- function(file, ...){ x <- gettext("%s format not supported. Consider using the '%s()' function") xA <- gettext("Import support for the %s format is exported by the %s package. Run 'library(%s)' then try again.") fmt <- tools::file_ext(file) out <- switch(fmt, bean = sprintf(xA, fmt, "ledger", "ledger"), beancount = sprintf(xA, fmt, "ledger", "ledger"), bib = sprintf(x, fmt, "bib2df::bib2df"), bmp = sprintf(x, fmt, "bmp::read.bmp"), doc = sprintf(x, fmt, "docxtractr::docx_extract_all_tbls"), docx = sprintf(x, fmt, "docxtractr::docx_extract_all_tbls"), gexf = sprintf(x, fmt, "rgexf::read.gexf"), gnumeric = sprintf(x, fmt, "gnumeric::read.gnumeric.sheet"), hledger = sprintf(xA, fmt, "ledger", "ledger"), jpeg = sprintf(x, fmt, "jpeg::readJPEG"), jpg = sprintf(x, fmt, "jpeg::readJPEG"), ledger = sprintf(xA, fmt, "ledger", "ledger"), npy = sprintf(x, fmt, "RcppCNPy::npyLoad"), qs = sprintf(x, fmt, "qs::qread"), pdf = sprintf(x, fmt, "tabulizer::extract_tables"), png = sprintf(x, fmt, "png::readPNG"), sdmx = sprintf(x, fmt, "sdmx::readSDMX"), sss = sprintf(x, fmt, "sss::read.sss"), tiff = sprintf(x, fmt, "tiff::readTIFF"), gettext("Format not supported")) stop(out, call. = FALSE) } #' @rdname extensions #' @export .export <- function(file, x, ...){ UseMethod(".export") } #' @rdname extensions #' @importFrom tools file_ext #' @export .export.default <- function(file, x, ...){ x <- gettext("%s format not supported. Consider using the '%s()' function") fmt <- tools::file_ext(file) out <- switch(fmt, gexf = sprintf(x, fmt, "rgexf::write.gexf"), jpg = sprintf(x, fmt, "jpeg::writeJPEG"), npy = sprintf(x, fmt, "RcppCNPy::npySave"), png = sprintf(x, fmt, "png::writePNG"), qs = sprintf(x, fmt, "qs::qsave"), tiff = sprintf(x, fmt, "tiff::writeTIFF"), xpt = sprintf(x, fmt, "SASxport::write.xport"), gettext("Format not supported")) stop(out, call. = FALSE) } rio/R/standardize_attributes.R0000644000176200001440000000402214014451034016137 0ustar liggesusersstandardize_attributes <- function(dat) { out <- dat a <- attributes(out) if ("variable.labels" %in% names(a)) { names(a)[names(a) == "variable.labels"] <- "var.labels" a$var.labels <- unname(a$var.labels) } # cleanup import attr(out, "var.labels") <- NULL # Stata attr(out, "variable.labels") <- NULL # SPSS attr(out, "formats") <- NULL attr(out, "types") <- NULL attr(out, "label.table") <- NULL for (i in seq_along(out)) { if ("value.labels" %in% names(attributes(out[[i]]))) { attr(out[[i]], "labels") <- attr(out[[i]], "value.labels", exact = TRUE) attr(out[[i]], "value.labels") <- NULL } if (any(grepl("haven_labelled", class(out[[i]])))) { out[[i]] <- unclass(out[[i]]) } if ("var.labels" %in% names(a)) { attr(out[[i]], "label") <- a$var.labels[i] } if (any(grepl("$format", names(a)))) { attr(out[[i]], "format") <- a[[grep("$format", names(a))[1L]]][i] } if ("types" %in% names(a)) { attr(out[[i]], "type") <- a$types[i] } if ("val.labels" %in% names(a) && (a$val.labels[i] != "")) { attr(out[[i]], "labels") <- a$label.table[[a$val.labels[i]]] } } out } restore_labelled <- function(x) { # restore labelled variable classes x[] <- lapply(x, function(v) { if (is.factor(v)) { haven::labelled( x = as.numeric(v), labels = stats::setNames(seq_along(levels(v)), levels(v)), label = attr(v, "label", exact = TRUE) ) } else if (!is.null(attr(v, "labels", exact = TRUE)) || !is.null(attr(v, "label", exact = TRUE))) { haven::labelled( x = v, labels = attr(v, "labels", exact = TRUE), label = attr(v, "label", exact = TRUE) ) } else { v } }) x } rio/NEWS.md0000644000176200001440000004732514142035332012151 0ustar liggesusers# rio 0.5.28 * Various fixes to tests, examples, and documentation for CRAN. * Temporarily disabled some tests that failed on Mac M1s. # rio 0.5.27 * Documentation fixes for CRAN. # rio 0.5.26 * Added support for "zsav" format. (#273) # rio 0.5.25 * Modified tests per email request from CRAN. * Added `coerce_character` argument (default FALSE) to `factorize()` to enable coercing character columns to factor. (#278) # rio 0.5.24 * Fix handling of "label" and "labels" attributes when exporting using haven methods (SPSS, Stata, SAS). (#268, h/t Ruben Arslan) * Fix (a different bug?) handling factors by haven::labelled() (#271, Alex Bokov) * HTML import can now handle multiple tbody elements within a single table, a th element in a non-header row, and empty elements in either the header or data. (#260, #263, #264 Bill Denney) # rio 0.5.23 * CSVY support is now provided by `data.table::fread()` and `data.table::fwrite()`, providing significant performance gains. * Added an internal `arg_reconcile()` function to streamline the task of removing/renaming arguments for compatibility with various functions (#245, Alex Bokov) # rio 0.5.22 * Added an `export_list()` function to write a list of data frames to multiple files using a vector of file names or a file pattern. (#207, h/t Bill Denney) * Added an `is_file_text()` function to determine whether a file is in a plain-text format. Optionally narrower subsets of characters can be specified, e.g. ASCII. (#236 Alex Bokov) # rio 0.5.21 * Added support for Apache Arrow (Parquet) files. (#214) * Fix dropping of variable label in `characterize()` and `factorize()`. (#204, h/t David Armstrong) * `import_list()` now returns a `filename` attribute for each data frame in the list (when importing from multiple files), in order to distinguish files with the same base name but different extensions (e.g., `import_list(c("foo.csv", "foo.tsv"))`). (#208, h/t Vimal Rawat) * Import of DBF files now does not convert strings to factors. (#202, h/t @jllipatz) * Implemented `import()` method for .dump R files. (#240) # rio 0.5.20 * Additional pointers were added to indicate how to load .doc, .docx, and .pdf files (#210, h/t Bill Denney) * Ensure that tests only run if the corresponding package is installed. (h/t Bill Denney) * Escape ampersands for html and xml export (#234 Alex Bokov) # rio 0.5.19 * Fix behavior of `export()` to plain text files when `append = TRUE` (#201, h/t Julián Urbano) * `import_list()` now preserve names of Excel sheets, etc. when the 'which' argument is specified. (#162, h/t Danny Parsons) * Modify message and errors when working with unrecognized file formats. (#195, h/t Trevor Davis) * Add support for GraphPad Prism .pzfx files (#205, h/t Bill Denney) # rio 0.5.18 * Adjust `import()`/`export()` for JSON file formats to allow non-data frame objects. Behavior modeled after RDS format. (#199 h/t Nathan Day) # rio 0.5.17 * Fix `the condition has length > 1 and only the first element will be used` warning in `gather_attributes()`. (#196, h/t Ruben Arslan) # rio 0.5.16 * Fix `the condition has length > 1 and only the first element will be used` warning in `standardize_attributes()`. # rio 0.5.15 * Modified some further code to produce compatibility with haven 2.0.0 release. (#188) * Add some additional function suggestions for the ledger package. (#190, h/t Trevor Davis) # rio 0.5.14 * Changes to `gather_attrs()` for haven 2.0.0 release. (#188) * Fixed a bug that generated a superfluous warning in `import()`. * Some style guide changes to code. # rio 0.5.13 * Allow `import()` of objects other than data frames from R-serialized (.rds and .rdata) files. Also, export of such objects to .rds files is supported, as previously intended. (#183, h/t Nicholas Jhirad) * Added (suggests) support for import of EViews files using `hexView::readEViews()`. (#163, h/t Boris Demeshev) # rio 0.5.12 * Add better package specification to `install_formats()` so that it reads from the `Suggests` field of the `DESCRIPTION` file. * Edit header of `README.Rmd` (and thusly `README.md`) to stop complaining about a lack of title field. * Fix typo in `CONTRIBUTING.md` (line said "three arguments", but only listed two). # rio 0.5.11 * Fixed a bug in `import()` wherein matlab files were ignored unless `format` was specified, as well as a related bug that made importing appear to fail for matlab files. (#171) * Fixed a bug in `export()` wherein `format` was ignored. (#99, h/t Sebastian Sauer) * Fixed a bug in the importing of European-style semicolon-separated CSV files. Added a test to ensure correct behavior. (#159, h/t Kenneth Rose) * Updated documentation to reflect recent changes to the xlsx `export()` method. (#156) # rio 0.5.10 * Removed some csvy-related tests, which were failing on CRAN. # rio 0.5.9 * Removed longstanding warnings from the tests of `export()` to fixed-width format. # rio 0.5.8 * Export the `get_ext()` function. (#169) * Fix a bug related to an xml2 bug (#168, h/t Jim Hester) * `import_list()` gains improved file name handling. (#164, h/t Ruaridh Williamson) * Removed the `overwrite` argument from `export()` method for xlsx files. Instead, existing workbooks are always overwritten unless which is specified, in which case only the specified sheet (if it exists) is overwritten. If the file exists but the `which` sheet does not, the data are added as a new sheet to the existing workbook. (#156) # rio 0.5.7 * Import of files with the ambiguous .dat extension, which are typically text-delimited files, are now passed to `data.table::fread()` with a message. Export to the format remains unsupported. (#98, #155) * Added support for export to SAS XPORT format (via `haven::write_xpt()`). (#157) * Switched default import package for SAS XPORT format to `haven::read_xpt()` with a `haven = FALSE` toggle restoring the previous default behavior using `foreign::read.xpt()`. (#157) # rio 0.5.6 * Fixed a bug in `import()` from compressed files wherein the `which` argument did not necessarily return the correct file if >=2 files in the compressed folder. * Tweak handling of `export()` to xlsx workbooks when `which` is specified. (#156) # rio 0.5.5 * Expanded test suite and increased test coverage, fixing a few tests that were failing on certain CRAN builds. # rio 0.5.4 * New functions `characterize()` and `factorize()` provide methods for converting "labelled" variables (e.g., from Stata or SPSS) into character or factor variables using embedded metadata. This can also be useful for exporting a metadata-rich file format into a plain text file. (#153) # rio 0.5.3 * Fixed a bug in writing to .zip and .tar archives related to absolute file paths. * Fixed some small bugs in `import_list()` and added tests for behavior. * Add .bib as known-unsupported format via `bib2df::bib2df()`. * Expanded test coverage. # rio 0.5.3 * Fixed a bug in `.import.rio_xlsx()` when `readxl = FALSE`. (#152, h/t Danny Parsons) * Added a new function `spread_attrs()` that reverses the `gather_attrs()` operation. * Expanded test coverage. # rio 0.5.1 * `export()` now sets variables with a "labels" attribute to **haven**'s "labelled" class. # rio 0.5.0 * CRAN Release. * Restored import of **openxlsx** so that writing to xlsx is supported on install. (#150) # rio 0.4.28 * Improved documentation of mapping between file format support and the packages used for each format. (#151, h/t Patrick Kennedy) * `import_list()` now returns a `NULL` entry for any failed imports, with a warning. (#149) * `import_list()` gains additional arguments `rbind_fill` and `rbind_label` to control rbind-ing behavior. (#149) # rio 0.4.27 * Import to and export from the clipboard now relies on `clipr::read_clip()` and `clipr::write_clip()`, respectively, thus (finally) providing Linux support. (#105, h/t Matthew Lincoln) * Added an `rbind` argument to `import_list()`. (#149) * Added a `setclass` argument to `import_list()`, ala the same in `import()`. * Switched `requireNamespace()` calls to `quietly = TRUE`. # rio 0.4.26 * Further fixes to .csv.gz import/export. (#146, h/t Trevor Davis) # rio 0.4.25 * Remove unecessary **urltools** dependency. * New function `import_list()` returns a list of data frames from a multi-object Excel Workbook, .Rdata file, zip directory, or HTML file. (#126, #129) * `export()` can now write a list of data frames to an Excel (.xlsx) workbook. (#142, h/t Jeremy Johnson) * `export()` can now write a list of data frames to an HTML (.html) file. # rio 0.4.24 * Verbosity of `export(format = "fwf")` now depends on `options("verbose")`. * Fixed various errors, warnings, and messages in fixed-width format tests. * Modified defaults and argument handling in internal function `read_delim()`. * Fixed handling of "data.table", "tibble", and "data.frame" classes in `set_class()`. (#144) # rio 0.4.23 * Moved all non-critical format packages to Suggests, rather than Imports. (#143) * Added support for Matlab formats. (#78, #98) * Added support for fst format. (#138) # rio 0.4.22 * Rearranged README. * Bumped readxl dependency to `>= 0.1.1` (#130, h/t Yongfa Chen) * Pass explicit `excel_format` arguments when using **readxl** functions. (#130) * Google Spreadsheets can now be imported using any of the allowed formats (CSV, TSV, XLSX, ODS). * Added support for writing to ODS files via `readODS::write_ods()`. (#96) # rio 0.4.21 * Handle HTML tables with `` elements. (h/t Mohamed Elgoussi) # rio 0.4.20 * Fixed a big in the `.import.rio_xls()` and `.import.rio_xlsx()` where the `sheet` argument would return an error. # rio 0.4.19 * Fixed a bug in the import of delimited files when `fread = FALSE`. (#133, h/t Christopher Gandrud) # rio 0.4.18 * With new data.table release, export using `fwrite()` is now the default for text-based file formats. # rio 0.4.17 * Fixed a bug in `.import.rio_xls()` wherein the `which` argument was ignored. (h/t Mohamed Elgoussi) # rio 0.4.16 * Added support for importing from multi-table HTML files using the `which` argument. (#126) # rio 0.4.15 * Improved behavior of `import()` and `export()` with respect to unrecognized file types. (#124, #125, h/t Jason Becker) * Added explicit tests of the S3 extension mechanism for `.import()` and `.export()`. * Attempt to recognize compressed but non-archived file formats (e.g., ".csv.gz"). (#123, h/t trevorld) # rio 0.4.14 * Update import and export methods to use new xml2 for XML and HTML export. (#86) # rio 0.4.13 * Fix failing tests related to stricter variable name handling for Stata files in development version of haven. (#113, h/t Hadley Wickham) * Added support for export of .sas7bdat files via haven (#116) * Restored support for import from SPSS portable via haven (#116) * Updated import methods to reflect changed formal argument names in haven. (#116) * Converted to roxygen2 documentation and made NEWS an explicit markdown file. # rio 0.4.12 * rio sets `options(datatable.fread.dec.experiment=FALSE)` during onLoad to address a Unix-specific locale issue. # rio 0.4.11 * Note unsupported NumPy i/o via RcppCNPy. (#112) * Fix import of European-style CSV files (sep = "," and sep2 = ";"). (#106, #107, h/t Stani Stadlmann) # rio 0.4.10 * Changed feather Imports to Suggests to make rio installable on older R versions. (#104) * Noted new RStudio add-in, GREA, that uses rio. (#109) * Migrated CSVY-related code to separate package (https://github.com/leeper/csvy/). (#111) # rio 0.4.9 * Removed unnecessary error in xlsx imports. (#103, h/t Kevin Wright) # rio 0.4.8 * Fixed a bug in the handling of "labelled" class variables imported from haven. (#102, h/t Pierre LaFortune) # rio 0.4.7 * Improved use of the `sep` argument for import of delimited files. (#99, h/t Danny Parsons) * Removed support for import of SPSS Portable (.por) files, given deprecation from haven. (#100) # rio 0.4.5 * Fixed other tests to remove (unimportant) warnings. * Fixed a failing test of file compression that was found in v0.4.3 on some platforms. # rio 0.4.3 * Improved, generalized, tested, and expanded documentation of `which` argument in `import()`. * Expanded test suite and made some small fixes. # rio 0.4.2 * Added support to import and export to `feather` data serialization format. (#88, h/t Jason Becker) # rio 0.4.1 * Fixed behavior of `gather_attrs()` on a data.frame with no attributes to gather. (#94) * Removed unrecognized file format error for import from compressed files. (#93) # rio 0.4.0 * CRAN Release. # rio 0.3.19 * Added a `gather_attrs()` function that moves variable-level attributes to the data.frame level. (#80) * Added preliminary support for import from HTML tables (#86) # rio 0.3.18 * Added support for export to HTML tables. (#86) # rio 0.3.17 * Fixed a bug in import from remote URLs with incorrect file extensions. # rio 0.3.16 * Added support for import from fixed-width format files via `readr::read_fwf()` with a specified `widths` argument. This may enable faster import of these types of files and provides a base-like interface for working with readr. (#48) # rio 0.3.15 * Added support for import from and export to yaml. (#83) * Fixed a bug when reading from an uncommented CSVY yaml header that contained single-line comments. (#84, h/t Tom Aldenberg) # rio 0.3.14 * Diagnostic messages were cleaned up to facilitate translation. (#57) # rio 0.3.12 * `.import()` and `.export()` are now exported S3 generics and documentation has been added to describe how to write rio extensions for new file types. An example of this functionality is shown in the new suggested "rio.db" package. # rio 0.3.11 * `import()` now uses xml2 to read XML structures and `export()` uses a custom method for writing to XML, thereby negating dependency on the XML package. (#67) * Enhancements were made to import and export of CSVY to store attribute metadata as variable-level attributes (like imports from binary file formats). * `import()` gains a `which` argument that is used to select which file to return from within a compressed tar or zip archive. * Export to tar now tries to correct for bugs in `tar()` that are being fixed in base R via [PR#16716](https://bugs.r-project.org/show_bug.cgi?id=16716). # rio 0.3.10 * Fixed a bug in `import()` (introduced in #62, 7a7480e5) that prevented import from clipboard. (h/t Kevin Wright) * `export()` returns a character string. (#82) # rio 0.3.9 * The use of `import()` for SAS, Stata, and SPSS files has been streamlined. Regardless of whether the `haven = TRUE` argument is used, the data.frame returned by `import()` should now be (nearly) identical, with all attributes stored at the variable rather than data.frame level. This is a non-backwards compatible change. (#80) # rio 0.3.8 * Fixed error in export to CSVY with a commented yaml header. (#81, h/t Andrew MacDonald) # rio 0.3.7 * `export()` now allows automatic file compression as tar, gzip, or zip using the `file` argument (e.g., `export(iris, "iris.csv.zip")`). # rio 0.3.6 * Expanded verbosity of `export()` for fixed-width format files and added a commented header containing column class and width information. * Exporting factors to fixed-width format now saves those values as integer rather than numeric. * Expanded test suite and separated tests into format-specific files. (#51) # rio 0.3.5 * Export of CSVY files now includes commenting the yaml header by default. Import of CSVY accommodates this automatically. (#74) # rio 0.3.3 * Export of CSVY files and metadata now supported by `export()`. (#73) * Import of CSVY files now stores dataset-level metadata in attributes of the output data.frame. (#73, h/t Tom Aldenberg) * When rio receives an unrecognized file format, it now issues a message. The new internal `.import.default()` and `.export.default()` then produce an error. This enables add-on packages to support additional formats through new s3 methods of the form `.import.rio_EXTENSION()` and `.export.rio_EXTENSION()`. # rio 0.3.2 * Use S3 dispatch internally to call new (unexported) `.import()` and `.export()` methods. (#42, h/t Jason Becker) # rio 0.3.0 * Release to CRAN. * Set a default numerical precision (of 2 decimal places) for export to fixed-width format. # rio 0.2.13 * Import stats package for `na.omit()`. # rio 0.2.11 * Added support for direct import from Google Sheets. (#60, #63, h/t Chung-hong Chan) # rio 0.2.7 * Refactored remote file retrieval into separate (non-exported) function used by `import()`. (#62) * Added test sutie to test file conversion. * Expanded test suite to include test of all export formats. # rio 0.2.6 * Cleaned up NAMESPACE file. # rio 0.2.5 * If file format for a remote file cannot be identified from the supplied URL or the final URL reported by `curl::curl_fetch_memory()`, the HTTP headers are checked for a filename in the Content-Disposition header. (#36) * Removed longurl dependency. This is no longer needed because we can identify formats using curl's url argument. * Fixed a bug related to importing European-style ("csv2") format files. (#44) * Updated CSVY import to embed variable-level metadata. (#52) * Use `urltools::url_parse()` to extract file extensions from complex URLs (e.g., those with query arguments). (#56) * Fixed NAMESPACE notes for base packages. (#58) # rio 0.2.4 * Modified behavior so that files imported using haven now store variable metadata at the data.frame level by default (unlike the default behavior in haven, which can cause problems). (#37, h/t Ista Zahn) * Added support for importing CSVY (http://csvy.org/) formatted files. (#52) * Added import dependency on data.table 1.9.5. (#39) # rio 0.2.2 * Uses the longurl package to expand shortened URLs so that their file type can be easily determined. # rio 0.2.1 * Improved support for importing from compressed directories, especially web-based compressed directories. (#38) * Add import dependency on curl >= 0.6 to facilitate content type parsing and format inference from URL redirects. (#36) * Add bit64 to `Suggests` to remove an `import` warning. # rio 0.2 * `import` always returns a data.frame, unless `setclass` is specified. (#22) * Added support for import from legacy Excel (.xls) files `readxl::read_excel`, making its use optional. (#19) * Added support for import from and export to the system clipboard on Windows and Mac OS. * Added support for export to simple XML documents. (#12) * Added support for import from simple XML documents via `XML::xmlToDataFrame`. (#12) * Added support for import from ODS spreadsheet formats. (#12, h/t Chung-hong Chan) * Use `data.table::fread` by default for reading delimited files. (#3) * Added support for import and export of `dput` and `dget` objects. (#10) * Added support for reading from compressed archives (.zip and .tar). (#7) * Added support for writing to fixed-width format. (#8) * Set `stringsAsFactors = FALSE` as default for reading tabular data. (#4) * Added support for HTTPS imports. (#1, h/t Christopher Gandrud) * Added support for automatic file naming in `export` based on object name and file format. (#5) * Exposed `convert` function. * Added vignette, knitr-generated README.md, and updated documentation. (#2) * Added some non-exported functions to simplify argument passing and streamline package API. (#6) * Separated `import`, `export`, `convert`, and utilities into separate source code files. * Expanded the set of supported file types/extensions, switched SPSS, SAS, and Stata formats to **haven**, making its use optional. # rio 0.1.2 * Updated documentation and fixed a bug in csv import without header. # rio 0.1.1 * Initial release rio/MD50000644000176200001440000001424214146644522011366 0ustar liggesuserscefd51503fdfa11f5c0c4c8f98131d51 *DESCRIPTION 1ca770e9e72baa9abb6101b1f9b20d3c *NAMESPACE fb3f37b36a6bc7cb30e0825c3c4d1c77 *NEWS.md 03cb7c5ee291ebafe03c40af1e001566 *R/arg_reconcile.R 8a769ed104119b83ebe296d1b5105919 *R/characterize.R d42c68d5d3da2d2e7b4e7f06bc02cf1f *R/compression.R 99d7ccd631e105ccb8dab5ea5d22087e *R/convert.R e0b29a8c50ddc72cecb01111b0f4e79d *R/convert_google_url.R 166a5e3e3ba30602ebcb41bd412f1825 *R/export.R ac132a0e5835a19e44f118806ad5a505 *R/export_list.R 10cf4b3e3e686d6899eb5e378a0c025c *R/export_methods.R 47f0485fc71471f180be0de5da6909cb *R/extensions.R 8057434a66ed35e12c1b1207b3adfded *R/fwf2.R e454bccc143e28c8c52a8f792c752452 *R/gather_attrs.R 17d6d0bd632ede9ee917ace6eadfaca7 *R/import.R 679a59cd931a596aa258f25842936911 *R/import_list.R a57e070d443abe0a6b999e5006da71a7 *R/import_methods.R 5bc6e72505be059bb09f309001117d28 *R/is_file_text.R 29c4accc12632e2d3c4b29e744308557 *R/onLoad.R fc76fbc9642538df62dc1c962d9d78bb *R/remote_to_local.R 7dc4370b66cd467a040f636b80d3c296 *R/rio.R ddbb7c668d70897889488d7e6c72911c *R/set_class.R f6d34ae2bd9ad7ea351b3c0890628f59 *R/standardize_attributes.R f0fc2bf7d08ecf609f325e98ddde0654 *R/suggestions.R 4d2c30b16fb570954f762f9e1de73f11 *R/utils.R 1d1d053296e2589722a3814eed6b8a03 *README.md 0997f38b27432dc79b41920614e923fd *build/vignette.rds 14c89f2f803b3e165ef08455992f1b13 *inst/CITATION 312bfb0bb544755ecb9473e7939f7125 *inst/doc/rio.R 48d8be4a2c08a530e9979fe5ed213cbc *inst/doc/rio.html b2fa397f650d1f270403bfa623197bc5 *inst/examples/example-DESCRIPTION ac82e5ea6d23f3b50f69d07b5f5afb77 *inst/examples/example.csvy 408c5b85e5327e84302fe436e90c412e *inst/examples/iris.xls d939406722129d3bed0bb2c7f9ffd715 *inst/examples/mtcars.ods da76d4babe6ce26b1390804777acc096 *inst/examples/noheader.csv 9c69992fa38075582524952ee3f192ae *inst/examples/twotables.html 867bfae807220fe0dc7d829d57322622 *man/arg_reconcile.Rd 489c841a5ade53323e6705c717b4a90b *man/characterize.Rd be748f8c687ec8331ed1aa46213e7de9 *man/convert.Rd fa523803218282a7351a2bc71def1e80 *man/export.Rd aabd66be8ec21f6523da7f422a01ce92 *man/export_list.Rd fbca00be52e92c84d571f09bf2f568c4 *man/extensions.Rd 28364a60eb6da8d72c52b45e814b5630 *man/figures/logo.png 6573f6f672b316a40147eb208e5ab14b *man/figures/logo.svg 61badc696101e9a7acaa7c86e7f79820 *man/gather_attrs.Rd 0472dd7c2129067898bd04c2205e9443 *man/get_ext.Rd f70cc800a370652cf8c1322e77a79399 *man/import.Rd e1ec62d2c943090ff666b02edaff74a7 *man/import_list.Rd 2b9d3cc658a700f09e4e976cb616d1e5 *man/install_formats.Rd 9ca55ac17a8c518ec3f3545968fb7f4d *man/is_file_text.Rd 31a2731b942c7ab5a315d3bd462a0cbf *man/rio.Rd ec6e7d824997372b50731b9f66a4f7ca *po/R-rio.pot dc67e448382de1d8b4c3d9084d2545a6 *tests/test-all.R a399853ea201b0862d45ab8a36693858 *tests/testthat/files/br-in-header.html 8e2e5162a0cf7cef927e00d0909cf2b5 *tests/testthat/files/br-in-td.html b57996b057553b53116579db4bf2aaca *tests/testthat/files/th-as-row-element.html eb14abb46a5bc7d2da7b5789dd0e348e *tests/testthat/files/two-tbody.html b0e4665f1aad553ab5109614d00fbbbc *tests/testthat/test_arg_reconcile.R df569c8379883bdd8ff9280a4cd1adbe *tests/testthat/test_characterize.R 9f506d8180d5ee9cac7c753f620d1b08 *tests/testthat/test_compress.R dd06da79f16540ceebc2317886a0953d *tests/testthat/test_convert.R b709d557771fb39a95c39c19ea37537f *tests/testthat/test_errors.R 7983d944658b7b5951b8a21c61cbff85 *tests/testthat/test_export_list.R 1fe8973ce74bd56eed9f177e0e6873f4 *tests/testthat/test_extensions.R e6726e596e02c2934f921d67fb87718b *tests/testthat/test_format_R.R 8bbca02a695deb56ac769120af97e989 *tests/testthat/test_format_arff.R d28d8ce2eb3127fd45250e5a36be7723 *tests/testthat/test_format_csv.R e298ca2a83849c7863e172f07173d85b *tests/testthat/test_format_csv_gz.R 0d03799f35da12c798afbe70d85a1464 *tests/testthat/test_format_csvy.R bd08b72d10ee6ad0f3227aeb2ea833b3 *tests/testthat/test_format_dbf.R 316cbcce32fefe11b940cf781961855f *tests/testthat/test_format_dif.R 4ff6e9769f5adf85b0888afeb9a701e7 *tests/testthat/test_format_dta.R cba0940d6a7d56a1ba27495d3acba159 *tests/testthat/test_format_eviews.R ad5733822bae70b2ffc54b7754721aab *tests/testthat/test_format_external_packages.R 7317772a78c20c9e0ed963c73b0d3c73 *tests/testthat/test_format_feather.R dcb7aff5cb80023d9b2d75fa6d9f03f9 *tests/testthat/test_format_fortran.R 60006258a5b44bb0601f4507a6247cd4 *tests/testthat/test_format_fst.R 8ec60cd8af8c548489934f1b093ec3d5 *tests/testthat/test_format_fwf.R ff512b55f73ea2dc236300019db13577 *tests/testthat/test_format_html.R eb7ab2a0a0853acf7af7a128e98982d9 *tests/testthat/test_format_json.R 5d7dbab7c2e04531aff99b7296e9e508 *tests/testthat/test_format_matlab.R 8bc0b500ddcf9ddb2b737243b15a8305 *tests/testthat/test_format_mtp.R a8fb526dc96bede46a4c7f314426315a *tests/testthat/test_format_ods.R 45452b8cc65f12a38d7a4dd801f6be8d *tests/testthat/test_format_parquet.R 8d027edfd91ada229f10cbd0c38fda41 *tests/testthat/test_format_psv.R 98ba5da136fab7fd16484e557962de92 *tests/testthat/test_format_pzfx.R c5774b99ab39366bd27420ebf5a35bc7 *tests/testthat/test_format_rdata.R 682898757c7537a045e44ed849195b05 *tests/testthat/test_format_rds.R 3ff37c4f82eced227594b3b10fda9e11 *tests/testthat/test_format_rec.R 1b5720103c134e7a1456f0a2b3064883 *tests/testthat/test_format_sas.R 1f8b5ff081114de1c19a6846a4304e5a *tests/testthat/test_format_sav.R 58c3269715b40b380a5dcde343d0d968 *tests/testthat/test_format_syd.R 17ffb3cb0ea32cffbfc2d17012edd496 *tests/testthat/test_format_tsv.R fa9e89ebefe5d619ddec0e67d7c9a408 *tests/testthat/test_format_xls.R df4ff9152490a2fd031b9f7e358dbc4e *tests/testthat/test_format_xml.R 13af5012d917280db266d6b5ca7e84c2 *tests/testthat/test_format_yml.R 3367fc887b1d0c6385d3d654276d3703 *tests/testthat/test_gather_attrs.R 0e9f665dd8e4b7e121c4a1e0030f6d8d *tests/testthat/test_guess.R d2a519f3951428ba58c477427f411cc0 *tests/testthat/test_identical.R bfe7f59b3f9d5d3fa2cefcf9f97fe5a2 *tests/testthat/test_import_list.R 3f4f7c8c008af5f8223efa516d224a74 *tests/testthat/test_install_formats.R 1909f2b5a221d576961bdfaf9a8a71ac *tests/testthat/test_is_file_text.R 2aa8667e47a80911af08bfcb4fde6e6e *tests/testthat/test_matrix.R 006d58c8582d1804f25c85f7c16e6bf5 *tests/testthat/test_remote.R 72df21c812b9396938fdd5918a9cf03a *tests/testthat/test_set_class.R 901116e6cad2a9f918cebb1c2c04d9de *vignettes/rio.Rmd rio/inst/0000755000176200001440000000000014142223745012024 5ustar liggesusersrio/inst/examples/0000755000176200001440000000000013541441740013641 5ustar liggesusersrio/inst/examples/example-DESCRIPTION0000644000176200001440000000434013324627613017005 0ustar liggesusersPackage: rio Type: Package Title: A Swiss-Army Knife for Data I/O Version: 0.5.12 Date: 2018-07-06 Authors@R: c(person("Jason", "Becker", role = "ctb", email = "jason@jbecker.co"), person("Chung-hong", "Chan", role = "aut", email = "chainsawtiney@gmail.com"), person("Geoffrey CH", "Chan", role = "ctb", email = "gefchchan@gmail.com"), person("Thomas J.", "Leeper", role = c("aut", "cre"), email = "thosjleeper@gmail.com", comment = c(ORCID = "0000-0003-4097-6326")), person("Christopher", "Gandrud", role = "ctb"), person("Andrew", "MacDonald", role = "ctb"), person("Ista", "Zahn", role = "ctb"), person("Stanislaus", "Stadlmann", role = "ctb"), person("Ruaridh", "Williamson", role = "ctb", email = "ruaridh.williamson@gmail.com"), person("Patrick", "Kennedy", role = "ctb"), person("Ryan", "Price", email = "ryapric@gmail.com", role = "ctb")) Description: Streamlined data import and export by making assumptions that the user is probably willing to make: 'import()' and 'export()' determine the data structure from the file extension, reasonable defaults are used for data import and export (e.g., 'stringsAsFactors=FALSE'), web-based import is natively supported (including from SSL/HTTPS), compressed files can be read directly without explicit decompression, and fast import packages are used where appropriate. An additional convenience function, 'convert()', provides a simple method for converting between file types. URL: https://github.com/leeper/rio BugReports: https://github.com/leeper/rio/issues Depends: R (>= 2.15.0) Imports: tools, stats, utils, foreign, haven (>= 1.1.0), curl (>= 0.6), data.table (>= 1.9.8), readxl (>= 0.1.1), openxlsx, tibble Suggests: datasets, bit64, testthat, knitr, magrittr, clipr, csvy, feather, fst, jsonlite, NANTUCKET, readODS (>= 1.6.4), readr, rmatio, xml2 (>= 1.2.0), yaml License: GPL-2 VignetteBuilder: knitr RoxygenNote: 6.0.1 rio/inst/examples/twotables.html0000644000176200001440000003671613234300275016544 0ustar liggesusers R Exported Data
mpgcyldisphpdratwtqsecvsamgearcarb
2161601103.92.6216.460144
2161601103.92.87517.020144
22.84108933.852.3218.611141
21.462581103.083.21519.441031
18.783601753.153.4417.020032
18.162251052.763.4620.221031
14.383602453.213.5715.840034
24.44146.7623.693.19201042
22.84140.8953.923.1522.91042
19.26167.61233.923.4418.31044
17.86167.61233.923.4418.91044
16.48275.81803.074.0717.40033
17.38275.81803.073.7317.60033
15.28275.81803.073.78180033
10.484722052.935.2517.980034
10.4846021535.42417.820034
14.784402303.235.34517.420034
32.4478.7664.082.219.471141
30.4475.7524.931.61518.521142
33.9471.1654.221.83519.91141
21.54120.1973.72.46520.011031
15.583181502.763.5216.870032
15.283041503.153.43517.30032
13.383502453.733.8415.410034
19.284001753.083.84517.050032
27.3479664.081.93518.91141
264120.3914.432.1416.70152
30.4495.11133.771.51316.91152
15.883512644.223.1714.50154
19.761451753.622.7715.50156
1583013353.543.5714.60158
21.441211094.112.7818.61142

Sepal.LengthSepal.WidthPetal.LengthPetal.WidthSpecies
5.13.51.40.2setosa
4.931.40.2setosa
4.73.21.30.2setosa
4.63.11.50.2setosa
53.61.40.2setosa
5.43.91.70.4setosa
4.63.41.40.3setosa
53.41.50.2setosa
4.42.91.40.2setosa
4.93.11.50.1setosa
5.43.71.50.2setosa
4.83.41.60.2setosa
4.831.40.1setosa
4.331.10.1setosa
5.841.20.2setosa
5.74.41.50.4setosa
5.43.91.30.4setosa
5.13.51.40.3setosa
5.73.81.70.3setosa
5.13.81.50.3setosa
5.43.41.70.2setosa
5.13.71.50.4setosa
4.63.610.2setosa
5.13.31.70.5setosa
4.83.41.90.2setosa
531.60.2setosa
53.41.60.4setosa
5.23.51.50.2setosa
5.23.41.40.2setosa
4.73.21.60.2setosa
4.83.11.60.2setosa
5.43.41.50.4setosa
5.24.11.50.1setosa
5.54.21.40.2setosa
4.93.11.50.2setosa
53.21.20.2setosa
5.53.51.30.2setosa
4.93.61.40.1setosa
4.431.30.2setosa
5.13.41.50.2setosa
53.51.30.3setosa
4.52.31.30.3setosa
4.43.21.30.2setosa
53.51.60.6setosa
5.13.81.90.4setosa
4.831.40.3setosa
5.13.81.60.2setosa
4.63.21.40.2setosa
5.33.71.50.2setosa
53.31.40.2setosa
73.24.71.4versicolor
6.43.24.51.5versicolor
6.93.14.91.5versicolor
5.52.341.3versicolor
6.52.84.61.5versicolor
5.72.84.51.3versicolor
6.33.34.71.6versicolor
4.92.43.31versicolor
6.62.94.61.3versicolor
5.22.73.91.4versicolor
523.51versicolor
5.934.21.5versicolor
62.241versicolor
6.12.94.71.4versicolor
5.62.93.61.3versicolor
6.73.14.41.4versicolor
5.634.51.5versicolor
5.82.74.11versicolor
6.22.24.51.5versicolor
5.62.53.91.1versicolor
5.93.24.81.8versicolor
6.12.841.3versicolor
6.32.54.91.5versicolor
6.12.84.71.2versicolor
6.42.94.31.3versicolor
6.634.41.4versicolor
6.82.84.81.4versicolor
6.7351.7versicolor
62.94.51.5versicolor
5.72.63.51versicolor
5.52.43.81.1versicolor
5.52.43.71versicolor
5.82.73.91.2versicolor
62.75.11.6versicolor
5.434.51.5versicolor
63.44.51.6versicolor
6.73.14.71.5versicolor
6.32.34.41.3versicolor
5.634.11.3versicolor
5.52.541.3versicolor
5.52.64.41.2versicolor
6.134.61.4versicolor
5.82.641.2versicolor
52.33.31versicolor
5.62.74.21.3versicolor
5.734.21.2versicolor
5.72.94.21.3versicolor
6.22.94.31.3versicolor
5.12.531.1versicolor
5.72.84.11.3versicolor
6.33.362.5virginica
5.82.75.11.9virginica
7.135.92.1virginica
6.32.95.61.8virginica
6.535.82.2virginica
7.636.62.1virginica
4.92.54.51.7virginica
7.32.96.31.8virginica
6.72.55.81.8virginica
7.23.66.12.5virginica
6.53.25.12virginica
6.42.75.31.9virginica
6.835.52.1virginica
5.72.552virginica
5.82.85.12.4virginica
6.43.25.32.3virginica
6.535.51.8virginica
7.73.86.72.2virginica
7.72.66.92.3virginica
62.251.5virginica
6.93.25.72.3virginica
5.62.84.92virginica
7.72.86.72virginica
6.32.74.91.8virginica
6.73.35.72.1virginica
7.23.261.8virginica
6.22.84.81.8virginica
6.134.91.8virginica
6.42.85.62.1virginica
7.235.81.6virginica
7.42.86.11.9virginica
7.93.86.42virginica
6.42.85.62.2virginica
6.32.85.11.5virginica
6.12.65.61.4virginica
7.736.12.3virginica
6.33.45.62.4virginica
6.43.15.51.8virginica
634.81.8virginica
6.93.15.42.1virginica
6.73.15.62.4virginica
6.93.15.12.3virginica
5.82.75.11.9virginica
6.83.25.92.3virginica
6.73.35.72.5virginica
6.735.22.3virginica
6.32.551.9virginica
6.535.22virginica
6.23.45.42.3virginica
5.935.11.8virginica
rio/inst/examples/mtcars.ods0000644000176200001440000001204512755335461015653 0ustar liggesusersPK !l9..mimetypeapplication/vnd.oasis.opendocument.spreadsheetPK!o0ToO styles.xmlVn0? z%eAb!J-zd&)Tl;ܴ4N[s͛O=OިL %T0UJ\ Z3Gٶвd06gNu. 9JkK{ bRʁS.K |'.s6:;wq|fȥ]%F[/,zfZYC8"V {怄} 5̃9m;r|Ym %H(+X fi- |wW;@sfZ+ xp#_(1:!#zSs tZ5;Ct m/j_rqPK!e content.xml]Msݧ*^g~&b'ߧ[-]t+0z/>COtiN~ؼXeIj?MzEoqmSߞl\%vȒnJ/6%@߷)fe#5JjCcWmN+[<,<. բ|eMOk=; n>'?|W" 3ܾ֮^-W82ʩ;]4N(_?א ëT\c|>%q6IH0V}R0Ym`OCП< Ǐ< O"Bef4¿`w.z\9t\|:̓M N7n+,QY&tmƘEڏf7Zg96yn7x3Ǣ1Y&Z͆ :a0ZE0o4;UCbHv/r冰Q.<ٸHom8LY}H: x2Q~`:^ VL=f/.˼> ;ӥ_}x([xa H32u&d-D.@Qhq(ٟ@DŽh($W\4CИ·A)2FW0pF_N<~S[qu.+ MЏM*UM0|JT= 3^u&ؙɆ2Lr`ir!ƪ G]ENPNp:,,a0j\%7QPO8>mdل3 KĘ-q|(/ vc6\S #EE'Yȴ ԁrj _(T(PĽ02s/qPaƪ11 C9@,dќ^CU(P&b<5ge/kd]fr:"!0nlF]j Fr}ADkOBY o}i;_lŇO/}AM0tɪb|*o:jBټQ#.d4Mӊ`lBafPwQ1.9 re`6C٩tE>EmFD^+|zb9%f)>eBa:N v㲝4.v8ɄW*jD\AI>z+d+٠3h]6M'0V}TˏƠ-O7T#٠tEM0G_) 3׿(L: #9vPLe *,襄 hFvka#OsXT7eB[=p4**"#31 "FqD@\̩5\茼w&ݛ3u&NϏ˳Ħ6A70/ ]u0RD#PhVZa"M.03,-[;֝b-Bv*aZNr=&͋0?${|b ^68ɋ;>3 rSKY\Afav+^aspӑC NQ i^&N w_Z:t_iaZ l20@Ҳor`nO-NGM@|3μ_ވ܅^7@/nm\S |Iu_tLvU{]=Im~B;nmnEjm<[}o%ݔ^mJ?PK!META-INF/manifest.xmlAj1 Ex?v+&z4[6#M>NavQH;I}y8N8sμhjL P\ ^a|~4! Iķe&W;$ڐb KAs];C_Wܴ<4VcAAk9~=Q4_w6#D>"Y֌lz_Ԃg?@C%G3H{ܭ/PK!Evmeta.xmlR;o0+? `L jUC6˾V٦` "Qw>؞k}u$Ehn gZ0e4Vw9$* oj>3ӎPaN:Y zN $s6 No_uϝk/ѧ'*1Ǡa=J mۤ]E㝳2TMӫ"p ycWɭqBD44~`7.!,YZ\j%S1F_F-RNXz h77CdlvdA9]57Chw -TMoI PK- !l9..mimetypePK-!o0ToO Tstyles.xmlPK-!e content.xmlPK-!yMETA-INF/manifest.xmlPK-!Evmeta.xmlPK rio/inst/examples/example.csvy0000644000176200001440000000047712767773204016226 0ustar liggesusers--- name: my-dataset fields: - name: var1 title: variable 1 type: string description: explaining var1 constraints: - required: true - name: var2 title: variable 2 type: integer - name: var3 title: variable 3 type: number --- var1,var2,var3 A,1,2.5 B,3,4.3 rio/inst/examples/iris.xls0000644000176200001440000005200013541441740015334 0ustar liggesusersࡱ; '%  !"#$(Root Entry  \pCalc Ba==@ 8@"1Calibri1Arial1Arial1Arial General                + ) , *    `DSheet1;4Sheet2TbZ 3  @@   Sepal.Length Sepal.Width Petal.Length Petal.WidthSpeciessetosa versicolor virginicampgcyldisphpdratwtqsecvsamgearcarbcc   dMbP?_%*+$!&C&"Times New Roman,Regular"&12&A)&&C&"Times New Roman,Regular"&12Page &P&333333?'333333?(-؂-?)-؂-?"d,,333333?333333?U           ffffff@{3S @3S [ S ffffff@[S 3S s433333?~  ffffff@S3433333? S[S  @ 3S  @ [+  s[S  SS  3+ ?~ +  S ~ @[ s  ffffff@{3433333? 433333?433333? ffffff@[433333? sS433333?~ S ffffff@[ ffffff@S ffffff@~ +433333?~  SS S S #{[S #S3S [S S  !"#$%&'()*+,-./0123456789:;<=>? sS[ ~ !#!ffffff@![+ !"3S "#@#[S #$S $%{ S %&@&3+ &'@' S '(ffffff@(S[S (){ )433333? )~ * *ffffff@~ * *433333? *+@+ S +,{,433333? ,-ffffff@- -.3.433333? ./ffffff@/S /0ffffff@03S 01K[S 12+3S 23[3 34  [ 45 5@~ 5[ 5~ 66ffffff@6  67+ c7ffffff@~ 7[ 78c   89 +[ 9:@:+ :;S ;ffffff@~ ;  ;<#;3 <= { =>; [ >~ ??@? ?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_@ [3 @A  AB{ B@~ B3 BC [ CD ;Dffffff@~ D D~ E E@E [ EFF? FG;  GH c  HI I@~ I[ IJ c[ JK   KLS L@~ L3 LM c3 MN{ N433333? NO [ OP{ PQQ? QR RS ; ST;Tffffff@~ T TUs [ UVS  VW{ [[ W~ X Xffffff@X@~ X  XYYffffff@~ Y  YZ  Z[[@~ [ [\ \ffffff@~ \3 \]  ]~ ^^ffffff@^+ ^_;  _`abcdefghijklmnopqrstuvwxyz{|}~` `a  ab   bcffffff@cc? cdcdffffff@~ d  de + ef ;fffffff@~ f fg ; K gh  hi+  i@ ij S K jk@k k433333? klk   lm{   mnC   no+ offffff@~ o op ;K pq K qr  rs csffffff@~ s st Ktffffff@ tu+  uv { v@ vw  wffffff@ w~ xx@x[ xy yffffff@ yzcz@~ z z{ c{   {| ;|@~ | |}{ +K }~C  ~ c  @~   cK C    c  [     c@  cffffff@~ [  3   ffffff@  S     sK {   ffffff@ffffff@  ;ffffff@~   ; ffffff@ { + { #ffffff@   + #   Ssffffff@ ; ffffff@~  PH0(  >@gg   dMbP?_%,*+&ffffff?'ffffff?(333333?)333333?"d,,333333?333333?U} !  , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ,           HV $V@$ $#v(\@$ $s! Q @q= ףp3@ H;"c 2@BSk H[" #&fffffVb@6R  # a@6 ~## H #c * #c fffff2@   ffffff0@$ "_ ffffff1@  *  " 1@  H"J HC"b67 $C"2^V-@$ $"zGa@Qk1@ $2z c@$o $/Kvףp= ?$ 4fffffQ@\(\?fffff3@ $!Q@(\4@ H;"ZS_ $"Z{Gz @$  H"z $"B(\@$ $*> c(\?fffff2@ Hjn[ $/h|?5?fffff0@ H"~" HFW; H>">"  , s! q= ףp@~ [ 2@  PH 0(  >@gg  FMicrosoft Excel 97-TabelleBiff8Oh+'0@H T ` l x TJL1@;9@@r@"m՜.+,D՜.+,\Root EntryF&WorkbookDCompObjIOle SummaryInformation(DocumentSummaryInformation8trio/inst/examples/noheader.csv0000644000176200001440000000070712642247627016160 0ustar liggesusers0,??,?,Total,1998,46991171,23594034,23397137 0,??,0 - 4?,0 ,1998,3390678,1788561,1602117 0,??,5 - 9?,5 ,1998,3428387,1820224,1608163 0,??,10 - 14?,10,1998,3195174,1668531,1526643 0,??,15 - 19?,15,1998,4094035,2102515,1991520 0,??,20 - 24?,20,1998,3942827,2022535,1920292 0,??,25 - 29?,25,1998,4637577,2371635,2265942 0,??,30 - 34?,30,1998,4375695,2239107,2136588 0,??,35 - 39?,35,1998,4502137,2308132,2194005 0,??,40 - 44?,40,1998,3754895,1924704,1830191 rio/inst/doc/0000755000176200001440000000000014142223747012573 5ustar liggesusersrio/inst/doc/rio.html0000644000176200001440000407666014142223745014273 0ustar liggesusers Import, Export, and Convert Data Files

Import, Export, and Convert Data Files

The idea behind rio is to simplify the process of importing data into R and exporting data from R. This process is, probably unnecessarily, extremely complex for beginning R users. Indeed, R supplies an entire manual describing the process of data import/export. And, despite all of that text, most of the packages described are (to varying degrees) out-of-date. Faster, simpler, packages with fewer dependencies have been created for many of the file types described in that document. rio aims to unify data I/O (importing and exporting) into two simple functions: import() and export() so that beginners (and experienced R users) never have to think twice (or even once) about the best way to read and write R data.

The core advantage of rio is that it makes assumptions that the user is probably willing to make. Specifically, rio uses the file extension of a file name to determine what kind of file it is. This is the same logic used by Windows OS, for example, in determining what application is associated with a given file type. By taking away the need to manually match a file type (which a beginner may not recognize) to a particular import or export function, rio allows almost all common data formats to be read with the same function.

By making import and export easy, it’s an obvious next step to also use R as a simple data conversion utility. Transferring data files between various proprietary formats is always a pain and often expensive. The convert function therefore combines import and export to easily convert between file formats (thus providing a FOSS replacement for programs like Stat/Transfer or Sledgehammer).

Supported file formats

rio supports a variety of different file formats for import and export. To keep the package slim, all non-essential formats are supported via “Suggests” packages, which are not installed (or loaded) by default. To ensure rio is fully functional, install these packages the first time you use rio via:

install_formats()

The full list of supported formats is below:

Format Typical Extension Import Package Export Package Installed by Default
Comma-separated data .csv data.table data.table Yes
Pipe-separated data .psv data.table data.table Yes
Tab-separated data .tsv data.table data.table Yes
CSVY (CSV + YAML metadata header) .csvy data.table data.table Yes
SAS .sas7bdat haven haven Yes
SPSS .sav haven haven Yes
SPSS (compressed) .zsav haven haven Yes
Stata .dta haven haven Yes
SAS XPORT .xpt haven haven Yes
SPSS Portable .por haven Yes
Excel .xls readxl Yes
Excel .xlsx readxl openxlsx Yes
R syntax .R base base Yes
Saved R objects .RData, .rda base base Yes
Serialized R objects .rds base base Yes
Epiinfo .rec foreign Yes
Minitab .mtp foreign Yes
Systat .syd foreign Yes
“XBASE” database files .dbf foreign foreign Yes
Weka Attribute-Relation File Format .arff foreign foreign Yes
Data Interchange Format .dif utils Yes
Fortran data no recognized extension utils Yes
Fixed-width format data .fwf utils utils Yes
gzip comma-separated data .csv.gz utils utils Yes
Apache Arrow (Parquet) .parquet arrow arrow No
EViews .wf1 hexView No
Feather R/Python interchange format .feather feather feather No
Fast Storage .fst fst fst No
JSON .json jsonlite jsonlite No
Matlab .mat rmatio rmatio No
OpenDocument Spreadsheet .ods readODS readODS No
HTML Tables .html xml2 xml2 No
Shallow XML documents .xml xml2 xml2 No
YAML .yml yaml yaml No
Clipboard default is tsv clipr clipr No
Google Sheets as Comma-separated data

Additionally, any format that is not supported by rio but that has a known R implementation will produce an informative error message pointing to a package and import or export function. Unrecognized formats will yield a simple “Unrecognized file format” error.

Data Import

rio allows you to import files in almost any format using one, typically single-argument, function. import() infers the file format from the file’s extension and calls the appropriate data import function for you, returning a simple data.frame. This works for any for the formats listed above.

library("rio")

x <- import("mtcars.csv")
y <- import("mtcars.rds")
z <- import("mtcars.dta")

# confirm identical
all.equal(x, y, check.attributes = FALSE)
## [1] TRUE
all.equal(x, z, check.attributes = FALSE)
## [1] TRUE

If for some reason a file does not have an extension, or has a file extension that does not match its actual type, you can manually specify a file format to override the format inference step. For example, we can read in a CSV file that does not have a file extension by specifying csv:

head(import("mtcars_noext", format = "csv"))
##    mpg cyl disp  hp drat    wt  qsec vs am gear carb
## 1 21.0   6  160 110 3.90 2.620 16.46  0  1    4    4
## 2 21.0   6  160 110 3.90 2.875 17.02  0  1    4    4
## 3 22.8   4  108  93 3.85 2.320 18.61  1  1    4    1
## 4 21.4   6  258 110 3.08 3.215 19.44  1  0    3    1
## 5 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2
## 6 18.1   6  225 105 2.76 3.460 20.22  1  0    3    1

Importing Data Lists

Sometimes you may have multiple data files that you want to import. import() only ever returns a single data frame, but import_list() can be used to import a vector of file names into R. This works even if the files are different formats:

str(import_list(dir()), 1)

Similarly, some single-file formats (e.g. Excel Workbooks, Zip directories, HTML files, etc.) can contain multiple data sets. Because import() is type safe, always returning a data frame, importing from these formats requires specifying a which argument to import() to dictate which data set (worksheet, file, table, etc.) to import (the default being which = 1). But import_list() can be used to import all (or only a specified subset, again via which) of data objects from these types of files.

Data Export

The export capabilities of rio are somewhat more limited than the import capabilities, given the availability of different functions in various R packages and because import functions are often written to make use of data from other applications and it never seems to be a development priority to have functions to export to the formats used by other applications. That said, rio currently supports the following formats:

library("rio")

export(mtcars, "mtcars.csv")
export(mtcars, "mtcars.rds")
export(mtcars, "mtcars.dta")

It is also easy to use export() as part of an R pipeline (from magrittr or dplyr). For example, the following code uses export() to save the results of a simple data transformation:

library("magrittr")
mtcars %>% subset(hp > 100) %>%  aggregate(. ~ cyl + am, data = ., FUN = mean) %>% export(file = "mtcars2.dta")

Some file formats (e.g., Excel workbooks, Rdata files) can support multiple data objects in a single file. export() natively supports output of multiple objects to these types of files:

# export to sheets of an Excel workbook
export(list(mtcars = mtcars, iris = iris), "multi.xlsx")
# export to an .Rdata file
## as a named list
export(list(mtcars = mtcars, iris = iris), "multi.rdata")

## as a character vector
export(c("mtcars", "iris"), "multi.rdata")

It is also possible to use the new (as of v0.6.0) function export_list() to write a list of data frames to multiple files using either a vector of file names or a file pattern:

export_list(list(mtcars = mtcars, iris = iris), "%s.tsv")

File Conversion

The convert() function links import() and export() by constructing a dataframe from the imported file and immediately writing it back to disk. convert() invisibly returns the file name of the exported file, so that it can be used to programmatically access the new file.

Because convert() is just a thin wrapper for import() and export(), it is very easy to use. For example, we can convert

# create file to convert
export(mtcars, "mtcars.dta")

# convert Stata to SPSS
convert("mtcars.dta", "mtcars.sav")

convert() also accepts lists of arguments for controlling import (in_opts) and export (out_opts). This can be useful for passing additional arguments to import or export methods. This could be useful, for example, for reading in a fixed-width format file and converting it to a comma-separated values file:

# create an ambiguous file
fwf <- tempfile(fileext = ".fwf")
cat(file = fwf, "123456", "987654", sep = "\n")

# see two ways to read in the file
identical(import(fwf, widths = c(1,2,3)), import(fwf, widths = c(1,-2,3)))
## [1] FALSE
# convert to CSV
convert(fwf, "fwf.csv", in_opts = list(widths = c(1,2,3)))
import("fwf.csv") # check conversion
##   V1 V2  V3
## 1  1 23 456
## 2  9 87 654

With metadata-rich file formats (e.g., Stata, SPSS, SAS), it can also be useful to pass imported data through characterize() or factorize() when converting to an open, text-delimited format: characterize() converts a single variable or all variables in a data frame that have “labels” attributes into character vectors based on the mapping of values to value labels (e.g., export(characterize(import("file.dta")), "file.csv")). An alternative approach is exporting to CSVY format, which records metadata in a YAML-formatted header at the beginning of a CSV file.

It is also possible to use rio on the command-line by calling Rscript with the -e (expression) argument. For example, to convert a file from Stata (.dta) to comma-separated values (.csv), simply do the following:

Rscript -e "rio::convert('mtcars.dta', 'mtcars.csv')"

Package Philosophy

The core advantage of rio is that it makes assumptions that the user is probably willing to make. Eight of these are important:

  1. rio uses the file extension of a file name to determine what kind of file it is. This is the same logic used by Windows OS, for example, in determining what application is associated with a given file type. By removing the need to manually match a file type (which a beginner may not recognize) to a particular import or export function, rio allows almost all common data formats to be read with the same function. And if a file extension is incorrect, users can force a particular import method by specifying the format argument. Other packages do this as well, but rio aims to be more complete and more consistent than each:
  • reader handles certain text formats and R binary files
  • io offers a set of custom formats
  • ImportExport focuses on select binary formats (Excel, SPSS, and Access files) and provides a Shiny interface.
  • SchemaOnRead iterates through a large number of possible import methods until one works successfully
  1. rio uses data.table::fread() for text-delimited files to automatically determine the file format regardless of the extension. So, a CSV that is actually tab-separated will still be correctly imported. It’s also crazy fast.

  2. rio, wherever possible, does not import character strings as factors.

  3. rio supports web-based imports natively, including from SSL (HTTPS) URLs, from shortened URLs, from URLs that lack proper extensions, and from (public) Google Documents Spreadsheets.

  4. rio imports from from single-file .zip and .tar archives automatically, without the need to explicitly decompress them. Export to compressed directories is also supported.

  5. rio wraps a variety of faster, more stream-lined I/O packages than those provided by base R or the foreign package. It uses data.table for delimited formats, haven for SAS, Stata, and SPSS files, smarter and faster fixed-width file import and export routines, and readxl and openxlsx for reading and writing Excel workbooks.

  6. rio stores metadata from rich file formats (SPSS, Stata, etc.) in variable-level attributes in a consistent form regardless of file type or underlying import function. These attributes are identified as:

    • label: a description of variable
    • labels: a vector mapping numeric values to character strings those values represent
    • format: a character string describing the variable storage type in the original file

    The gather_attrs() function makes it easy to move variable-level attributes to the data frame level (and spread_attrs() reverses that gathering process). These can be useful, especially, during file conversion to more easily modify attributes that are handled differently across file formats. As an example, the following idiom can be used to trim SPSS value labels to the 32-character maximum allowed by Stata:

    dat <- gather_attrs(rio::import("data.sav"))
    attr(dat, "labels") <- lapply(attributes(dat)$labels, function(x) {
        if (!is.null(x)) {
            names(x) <- substring(names(x), 1, 32)
        }
        x
    })
    export(spread_attrs(dat), "data.dta")

    In addition, two functions (added in v0.5.5) provide easy ways to create character and factor variables from these “labels” attributes. characterize() converts a single variable or all variables in a data frame that have “labels” attributes into character vectors based on the mapping of values to value labels. factorize() does the same but returns factor variables. This can be especially helpful for converting these rich file formats into open formats (e.g., export(characterize(import("file.dta")), "file.csv").

  7. rio imports and exports files based on an internal S3 class infrastructure. This means that other packages can contain extensions to rio by registering S3 methods. These methods should take the form .import.rio_X() and .export.rio_X(), where X is the file extension of a file type. An example is provided in the rio.db package.

rio/inst/doc/rio.R0000644000176200001440000000553714142223744013516 0ustar liggesusers## ---- echo=FALSE, results='hide'---------------------------------------------- library("rio") export(mtcars, "mtcars.csv") export(mtcars, "mtcars.rds") export(mtcars, "mtcars.dta") export(mtcars, "mtcars_noext", format = "csv") ## ----------------------------------------------------------------------------- library("rio") x <- import("mtcars.csv") y <- import("mtcars.rds") z <- import("mtcars.dta") # confirm identical all.equal(x, y, check.attributes = FALSE) all.equal(x, z, check.attributes = FALSE) ## ----------------------------------------------------------------------------- head(import("mtcars_noext", format = "csv")) ## ---- echo=FALSE, results='hide'---------------------------------------------- unlink("mtcars.csv") unlink("mtcars.rds") unlink("mtcars.dta") unlink("mtcars_noext") ## ----------------------------------------------------------------------------- library("rio") export(mtcars, "mtcars.csv") export(mtcars, "mtcars.rds") export(mtcars, "mtcars.dta") ## ----------------------------------------------------------------------------- library("magrittr") mtcars %>% subset(hp > 100) %>% aggregate(. ~ cyl + am, data = ., FUN = mean) %>% export(file = "mtcars2.dta") ## ----------------------------------------------------------------------------- # export to sheets of an Excel workbook export(list(mtcars = mtcars, iris = iris), "multi.xlsx") ## ----------------------------------------------------------------------------- # export to an .Rdata file ## as a named list export(list(mtcars = mtcars, iris = iris), "multi.rdata") ## as a character vector export(c("mtcars", "iris"), "multi.rdata") ## ----------------------------------------------------------------------------- export_list(list(mtcars = mtcars, iris = iris), "%s.tsv") ## ----------------------------------------------------------------------------- # create file to convert export(mtcars, "mtcars.dta") # convert Stata to SPSS convert("mtcars.dta", "mtcars.sav") ## ----------------------------------------------------------------------------- # create an ambiguous file fwf <- tempfile(fileext = ".fwf") cat(file = fwf, "123456", "987654", sep = "\n") # see two ways to read in the file identical(import(fwf, widths = c(1,2,3)), import(fwf, widths = c(1,-2,3))) # convert to CSV convert(fwf, "fwf.csv", in_opts = list(widths = c(1,2,3))) import("fwf.csv") # check conversion ## ---- echo=FALSE, results='hide'---------------------------------------------- unlink("mtcars.dta") unlink("mtcars.sav") unlink("fwf.csv") unlink(fwf) ## ---- echo=FALSE, results='hide'---------------------------------------------- unlink("mtcars.csv") unlink("mtcars.rds") unlink("mtcars.rdata") unlink("mtcars.dta") unlink("multi.xlsx") unlink("multi.rdata") unlink("mtcars2.dta") unlink("mtcars.tsv") unlink("iris.tsv") rio/inst/CITATION0000644000176200001440000000133112657050412013155 0ustar liggesuserscitHeader("To cite package 'rio' in publications use:") year <- sub(".*(2[[:digit:]]{3})-.*", "\\1", meta$Date, perl = TRUE) vers <- paste("R package version", meta$Version) citEntry(entry="Manual", title = "rio: A Swiss-army knife for data file I/O", author = personList(as.person("Chung-hong Chan"), as.person("Geoffrey CH Chan"), as.person("Thomas J. Leeper"), as.person("Jason Becker")), year = year, note = vers, textVersion = paste("Chung-hong Chan, Geoffrey CH Chan, Thomas J. Leeper, and Jason Becker (", year, "). rio: A Swiss-army knife for data file I/O. ", vers, ".", sep="")) rio/po/0000755000176200001440000000000013577230343011470 5ustar liggesusersrio/po/R-rio.pot0000644000176200001440000000664014063055403013203 0ustar liggesusersmsgid "" msgstr "" "Project-Id-Version: rio 0.5.27\n" "POT-Creation-Date: 2021-06-18 09:36\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "Last-Translator: FULL NAME \n" "Language-Team: LANGUAGE \n" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=CHARSET\n" "Content-Transfer-Encoding: 8bit\n" msgid "The following arguments were ignored for" msgstr "" msgid ":" msgstr "" msgid "," msgstr "" msgid "condition" msgstr "" msgid "File compression failed for %s!" msgstr "" msgid "Zip archive contains multiple files. Attempting first file." msgstr "" msgid "Tar archive contains multiple files. Attempting first file." msgstr "" msgid "'outfile' is missing with no default" msgstr "" msgid "Must specify 'file' and/or 'format'" msgstr "" msgid "'x' is not a data.frame or matrix" msgstr "" msgid "'x' must be a list. Perhaps you want export()?" msgstr "" msgid "'file' must be a character vector" msgstr "" msgid "'file' must have a %s placehold" msgstr "" msgid "All elements of 'x' must be named or all must be unnamed" msgstr "" msgid "Names of elements in 'x' are not unique" msgstr "" msgid "'file' must be same length as 'x', or a single pattern with a %s placeholder" msgstr "" msgid "File names are not unique" msgstr "" msgid "Export failed for element %d, filename: %s" msgstr "" msgid "data.table::fwrite() does not support writing to connections. Using utils::write.table() instead." msgstr "" msgid "Columns:" msgstr "" msgid "'x' must be a data.frame, list, or environment" msgstr "" msgid "%s format not supported. Consider using the '%s()' function" msgstr "" msgid "Import support for the %s format is exported by the %s package. Run 'library(%s)' then try again." msgstr "" msgid "Format not supported" msgstr "" msgid "'x' is not a data.frame" msgstr "" msgid "No such file" msgstr "" msgid "'data.table = TRUE' argument overruled. Using setclass = '%s'" msgstr "" msgid "Import failed for %s" msgstr "" msgid "Import failed for %s from %s" msgstr "" msgid "Attempt to rbindlist() the data did not succeed. List returned instead." msgstr "" msgid "data.table::fread() does not support reading from connections. Using utils::read.table() instead." msgstr "" msgid "Ambiguous file format ('.dat'), but attempting 'data.table::fread(\"%s\")'" msgstr "" msgid "Import of fixed-width format data requires a 'widths' argument. See ? read.fwf()." msgstr "" msgid "File imported using load. Arguments to '...' ignored." msgstr "" msgid "Dump file contains multiple objects. Returning first object." msgstr "" msgid "File imported using readRDS. Arguments to '...' ignored." msgstr "" msgid "Rdata file contains multiple objects. Returning first object." msgstr "" msgid "Import of Fortran format data requires a 'style' argument. See ? utils::read.fortran()." msgstr "" msgid "The following arguments were ignored for read_ods:" msgstr "" msgid "Requested table exceeds number of tables found in file (" msgstr "" msgid ")!" msgstr "" msgid "Unrecognized file format. Try specifying with the format argument." msgstr "" msgid "'file' is not a string" msgstr "" msgid "'file' has no extension" msgstr "" msgid "last record incomplete, %d line discarded" msgid_plural "last record incomplete, %d lines discarded" msgstr[0] "" msgstr[1] ""