rio/ 0000755 0001762 0000144 00000000000 14146644522 011053 5 ustar ligges users rio/NAMESPACE 0000644 0001762 0000144 00000007714 14135741523 012300 0 ustar ligges users # Generated by roxygen2: do not edit by hand
S3method(.export,default)
S3method(.export,rio_arff)
S3method(.export,rio_clipboard)
S3method(.export,rio_csv)
S3method(.export,rio_csv2)
S3method(.export,rio_csvy)
S3method(.export,rio_dbf)
S3method(.export,rio_dta)
S3method(.export,rio_dump)
S3method(.export,rio_feather)
S3method(.export,rio_fst)
S3method(.export,rio_fwf)
S3method(.export,rio_html)
S3method(.export,rio_json)
S3method(.export,rio_matlab)
S3method(.export,rio_ods)
S3method(.export,rio_parquet)
S3method(.export,rio_psv)
S3method(.export,rio_pzfx)
S3method(.export,rio_r)
S3method(.export,rio_rda)
S3method(.export,rio_rdata)
S3method(.export,rio_rds)
S3method(.export,rio_sas7bdat)
S3method(.export,rio_sav)
S3method(.export,rio_tsv)
S3method(.export,rio_txt)
S3method(.export,rio_xlsx)
S3method(.export,rio_xml)
S3method(.export,rio_xpt)
S3method(.export,rio_yml)
S3method(.export,rio_zsav)
S3method(.import,default)
S3method(.import,rio_arff)
S3method(.import,rio_clipboard)
S3method(.import,rio_csv)
S3method(.import,rio_csv2)
S3method(.import,rio_csvy)
S3method(.import,rio_dat)
S3method(.import,rio_dbf)
S3method(.import,rio_dif)
S3method(.import,rio_dta)
S3method(.import,rio_dump)
S3method(.import,rio_eviews)
S3method(.import,rio_feather)
S3method(.import,rio_fortran)
S3method(.import,rio_fst)
S3method(.import,rio_fwf)
S3method(.import,rio_html)
S3method(.import,rio_json)
S3method(.import,rio_matlab)
S3method(.import,rio_mtp)
S3method(.import,rio_ods)
S3method(.import,rio_parquet)
S3method(.import,rio_psv)
S3method(.import,rio_pzfx)
S3method(.import,rio_r)
S3method(.import,rio_rda)
S3method(.import,rio_rdata)
S3method(.import,rio_rds)
S3method(.import,rio_rec)
S3method(.import,rio_sas7bdat)
S3method(.import,rio_sav)
S3method(.import,rio_spss)
S3method(.import,rio_syd)
S3method(.import,rio_tsv)
S3method(.import,rio_txt)
S3method(.import,rio_xls)
S3method(.import,rio_xlsx)
S3method(.import,rio_xml)
S3method(.import,rio_xpt)
S3method(.import,rio_yml)
S3method(.import,rio_zsav)
S3method(characterize,data.frame)
S3method(characterize,default)
S3method(factorize,data.frame)
S3method(factorize,default)
export(.export)
export(.import)
export(characterize)
export(convert)
export(export)
export(export_list)
export(factorize)
export(gather_attrs)
export(get_ext)
export(import)
export(import_list)
export(install_formats)
export(is_file_text)
export(spread_attrs)
importFrom(curl,curl_fetch_memory)
importFrom(curl,parse_headers)
importFrom(data.table,as.data.table)
importFrom(data.table,fread)
importFrom(data.table,fwrite)
importFrom(data.table,is.data.table)
importFrom(foreign,read.arff)
importFrom(foreign,read.dbf)
importFrom(foreign,read.dta)
importFrom(foreign,read.epiinfo)
importFrom(foreign,read.mtp)
importFrom(foreign,read.spss)
importFrom(foreign,read.systat)
importFrom(foreign,read.xport)
importFrom(foreign,write.arff)
importFrom(foreign,write.dbf)
importFrom(haven,labelled)
importFrom(haven,read_dta)
importFrom(haven,read_por)
importFrom(haven,read_sas)
importFrom(haven,read_sav)
importFrom(haven,read_xpt)
importFrom(haven,write_dta)
importFrom(haven,write_sas)
importFrom(haven,write_sav)
importFrom(haven,write_xpt)
importFrom(openxlsx,read.xlsx)
importFrom(openxlsx,write.xlsx)
importFrom(readxl,read_xls)
importFrom(readxl,read_xlsx)
importFrom(stats,na.omit)
importFrom(stats,setNames)
importFrom(tibble,as_tibble)
importFrom(tibble,is_tibble)
importFrom(tools,file_ext)
importFrom(tools,file_path_sans_ext)
importFrom(utils,capture.output)
importFrom(utils,install.packages)
importFrom(utils,installed.packages)
importFrom(utils,packageName)
importFrom(utils,read.DIF)
importFrom(utils,read.fortran)
importFrom(utils,read.fwf)
importFrom(utils,read.table)
importFrom(utils,tar)
importFrom(utils,type.convert)
importFrom(utils,untar)
importFrom(utils,unzip)
importFrom(utils,write.csv)
importFrom(utils,write.table)
importFrom(utils,zip)
rio/README.md 0000644 0001762 0000144 00000051351 14050203722 012322 0 ustar ligges users
# rio: A Swiss-Army Knife for Data I/O
[](https://cran.r-project.org/package=rio)
 [](https://travis-ci.org/leeper/rio)
[](https://ci.appveyor.com/project/leeper/rio)
[](https://codecov.io/github/leeper/rio?branch=master)
## Overview
The aim of **rio** is to make data file I/O in R as easy as possible by
implementing four simple functions in Swiss-army knife style:
- `import()` provides a painless data import experience by
automatically choosing the appropriate import/read function based on
file extension (or a specified `format` argument)
- `import_list()` imports a list of data frames from a multi-object
file (Excel workbook, .Rdata files, zip directory, or HTML file)
- `export()` provides the same painless file recognition for data
export/write functionality
- `convert()` wraps `import()` and `export()` to allow the user to
easily convert between file formats (thus providing a FOSS
replacement for programs like
[Stat/Transfer](https://stattransfer.com/) or
[Sledgehammer](https://www.mtna.us/#/products/sledgehammer)).
Relatedly, [Luca Braglia](https://lbraglia.github.io/) has created a
Shiny app called [rioweb](https://github.com/lbraglia/rioweb) that
provides access to the file conversion features of rio.
[GREA](https://github.com/Stan125/GREA/) is an RStudio add-in that
provides an interactive interface for reading in data using rio.
## Installation
The package is available on
[CRAN](https://cran.r-project.org/package=rio) and can be installed
directly in R using `install.packages()`. You may want to run
`install_formats()` after the first installation.
``` r
install.packages("rio")
install_formats()
```
The latest development version on GitHub can be installed using:
``` r
if (!require("remotes")){
install.packages("remotes")
}
remotes::install_github("leeper/rio")
```
## Usage
Because **rio** is meant to streamline data I/O, the package is
extremely easy to use. Here are some examples of reading, writing, and
converting data files.
### Export
Exporting data is handled with one function, `export()`:
``` r
library("rio")
export(mtcars, "mtcars.csv") # comma-separated values
export(mtcars, "mtcars.rds") # R serialized
export(mtcars, "mtcars.sav") # SPSS
```
A particularly useful feature of rio is the ability to import from and
export to compressed (e.g., zip) directories, saving users the extra
step of compressing a large exported file, e.g.:
``` r
export(mtcars, "mtcars.tsv.zip")
```
As of rio v0.5.0, `export()` can also write multiple data frames to
respective sheets of an Excel workbook or an HTML file:
``` r
export(list(mtcars = mtcars, iris = iris), file = "mtcars.xlsx")
```
### Import
Importing data is handled with one function, `import()`:
``` r
x <- import("mtcars.csv")
y <- import("mtcars.rds")
z <- import("mtcars.sav")
# confirm data match
all.equal(x, y, check.attributes = FALSE)
```
## [1] TRUE
``` r
all.equal(x, z, check.attributes = FALSE)
```
## [1] TRUE
Note: Because of inconsistencies across underlying packages, the
data.frame returned by `import` might vary slightly (in variable classes
and attributes) depending on file type.
In rio v0.5.0, a new list-based import function was added. This allows
users to import a list of data frames from a multi-object file (such as
an Excel workbook, .Rdata file, zip directory, or HTML file):
``` r
str(m <- import_list("mtcars.xlsx"))
```
## List of 2
## $ mtcars:'data.frame': 32 obs. of 11 variables:
## ..$ mpg : num [1:32] 21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
## ..$ cyl : num [1:32] 6 6 4 6 8 6 8 4 4 6 ...
## ..$ disp: num [1:32] 160 160 108 258 360 ...
## ..$ hp : num [1:32] 110 110 93 110 175 105 245 62 95 123 ...
## ..$ drat: num [1:32] 3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
## ..$ wt : num [1:32] 2.62 2.88 2.32 3.21 3.44 ...
## ..$ qsec: num [1:32] 16.5 17 18.6 19.4 17 ...
## ..$ vs : num [1:32] 0 0 1 1 0 1 0 1 1 1 ...
## ..$ am : num [1:32] 1 1 1 0 0 0 0 0 0 0 ...
## ..$ gear: num [1:32] 4 4 4 3 3 3 3 4 4 4 ...
## ..$ carb: num [1:32] 4 4 1 1 2 1 4 2 2 4 ...
## $ iris :'data.frame': 150 obs. of 5 variables:
## ..$ Sepal.Length: num [1:150] 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
## ..$ Sepal.Width : num [1:150] 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
## ..$ Petal.Length: num [1:150] 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
## ..$ Petal.Width : num [1:150] 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
## ..$ Species : chr [1:150] "setosa" "setosa" "setosa" "setosa" ...
And for rio v0.6.0, a new list-based export function was added. This
makes it easy to export a list of (possibly named) data frames to
multiple files:
``` r
export_list(m, "%s.tsv")
c("mtcars.tsv", "iris.tsv") %in% dir()
```
## [1] TRUE TRUE
### Convert
The `convert()` function links `import()` and `export()` by constructing
a dataframe from the imported file and immediately writing it back to
disk. `convert()` invisibly returns the file name of the exported file,
so that it can be used to programmatically access the new file.
``` r
convert("mtcars.sav", "mtcars.dta")
```
It is also possible to use **rio** on the command-line by calling
`Rscript` with the `-e` (expression) argument. For example, to convert a
file from Stata (.dta) to comma-separated values (.csv), simply do the
following:
Rscript -e "rio::convert('iris.dta', 'iris.csv')"
## Supported file formats
**rio** supports a wide range of file formats. To keep the package slim,
all non-essential formats are supported via “Suggests” packages, which
are not installed (or loaded) by default. To ensure rio is fully
functional, install these packages the first time you use **rio** via:
``` r
install_formats()
```
The full list of supported formats is below:
| Format | Typical Extension | Import Package | Export Package | Installed by Default |
| ----------------------------------------------------- | ----------------------- | --------------------------------------------------------------- | --------------------------------------------------------------- | -------------------- |
| Comma-separated data | .csv | [**data.table**](https://cran.r-project.org/package=data.table) | [**data.table**](https://cran.r-project.org/package=data.table) | Yes |
| Pipe-separated data | .psv | [**data.table**](https://cran.r-project.org/package=data.table) | [**data.table**](https://cran.r-project.org/package=data.table) | Yes |
| Tab-separated data | .tsv | [**data.table**](https://cran.r-project.org/package=data.table) | [**data.table**](https://cran.r-project.org/package=data.table) | Yes |
| CSVY (CSV + YAML metadata header) | .csvy | [**data.table**](https://cran.r-project.org/package=data.table) | [**data.table**](https://cran.r-project.org/package=data.table) | Yes |
| SAS | .sas7bdat | [**haven**](https://cran.r-project.org/package=haven) | [**haven**](https://cran.r-project.org/package=haven) | Yes |
| SPSS | .sav | [**haven**](https://cran.r-project.org/package=haven) | [**haven**](https://cran.r-project.org/package=haven) | Yes |
| SPSS (compressed) | .zsav | [**haven**](https://cran.r-project.org/package=haven) | [**haven**](https://cran.r-project.org/package=haven) | Yes |
| Stata | .dta | [**haven**](https://cran.r-project.org/package=haven) | [**haven**](https://cran.r-project.org/package=haven) | Yes |
| SAS XPORT | .xpt | [**haven**](https://cran.r-project.org/package=haven) | [**haven**](https://cran.r-project.org/package=haven) | Yes |
| SPSS Portable | .por | [**haven**](https://cran.r-project.org/package=haven) | | Yes |
| Excel | .xls | [**readxl**](https://cran.r-project.org/package=readxl) | | Yes |
| Excel | .xlsx | [**readxl**](https://cran.r-project.org/package=readxl) | [**openxlsx**](https://cran.r-project.org/package=openxlsx) | Yes |
| R syntax | .R | **base** | **base** | Yes |
| Saved R objects | .RData, .rda | **base** | **base** | Yes |
| Serialized R objects | .rds | **base** | **base** | Yes |
| Epiinfo | .rec | [**foreign**](https://cran.r-project.org/package=foreign) | | Yes |
| Minitab | .mtp | [**foreign**](https://cran.r-project.org/package=foreign) | | Yes |
| Systat | .syd | [**foreign**](https://cran.r-project.org/package=foreign) | | Yes |
| “XBASE” database files | .dbf | [**foreign**](https://cran.r-project.org/package=foreign) | [**foreign**](https://cran.r-project.org/package=foreign) | Yes |
| Weka Attribute-Relation File Format | .arff | [**foreign**](https://cran.r-project.org/package=foreign) | [**foreign**](https://cran.r-project.org/package=foreign) | Yes |
| Data Interchange Format | .dif | **utils** | | Yes |
| Fortran data | no recognized extension | **utils** | | Yes |
| Fixed-width format data | .fwf | **utils** | **utils** | Yes |
| gzip comma-separated data | .csv.gz | **utils** | **utils** | Yes |
| Apache Arrow (Parquet) | .parquet | [**arrow**](https://cran.r-project.org/package=arrow) | [**arrow**](https://cran.r-project.org/package=arrow) | No |
| EViews | .wf1 | [**hexView**](https://cran.r-project.org/package=hexView) | | No |
| Feather R/Python interchange format | .feather | [**feather**](https://cran.r-project.org/package=feather) | [**feather**](https://cran.r-project.org/package=feather) | No |
| Fast Storage | .fst | [**fst**](https://cran.r-project.org/package=fst) | [**fst**](https://cran.r-project.org/package=fst) | No |
| JSON | .json | [**jsonlite**](https://cran.r-project.org/package=jsonlite) | [**jsonlite**](https://cran.r-project.org/package=jsonlite) | No |
| Matlab | .mat | [**rmatio**](https://cran.r-project.org/package=rmatio) | [**rmatio**](https://cran.r-project.org/package=rmatio) | No |
| OpenDocument Spreadsheet | .ods | [**readODS**](https://cran.r-project.org/package=readODS) | [**readODS**](https://cran.r-project.org/package=readODS) | No |
| HTML Tables | .html | [**xml2**](https://cran.r-project.org/package=xml2) | [**xml2**](https://cran.r-project.org/package=xml2) | No |
| Shallow XML documents | .xml | [**xml2**](https://cran.r-project.org/package=xml2) | [**xml2**](https://cran.r-project.org/package=xml2) | No |
| YAML | .yml | [**yaml**](https://cran.r-project.org/package=yaml) | [**yaml**](https://cran.r-project.org/package=yaml) | No |
| Clipboard | default is tsv | [**clipr**](https://cran.r-project.org/package=clipr) | [**clipr**](https://cran.r-project.org/package=clipr) | No |
| [Google Sheets](https://www.google.com/sheets/about/) | as Comma-separated data | | | |
| Graphpad Prism | .pzfx | [**pzfx**](https://cran.r-project.org/package=pzfx) | [**pzfx**](https://cran.r-project.org/package=pzfx) | No |
Additionally, any format that is not supported by **rio** but that has a
known R implementation will produce an informative error message
pointing to a package and import or export function. Unrecognized
formats will yield a simple “Unrecognized file format” error.
## Package Philosophy
The core advantage of **rio** is that it makes assumptions that the user
is probably willing to make. Eight of these are important:
1. **rio** uses the file extension of a file name to determine what
kind of file it is. This is the same logic used by Windows OS, for
example, in determining what application is associated with a given
file type. By removing the need to manually match a file type (which
a beginner may not recognize) to a particular import or export
function, **rio** allows almost all common data formats to be read
with the same function. And if a file extension is incorrect, users
can force a particular import method by specifying the `format`
argument. Other packages do this as well, but **rio** aims to be
more complete and more consistent than each:
- [**reader**](https://cran.r-project.org/package=reader) handles
certain text formats and R binary files
- [**io**](https://cran.r-project.org/package=io) offers a set of
custom formats
- [**ImportExport**](https://cran.r-project.org/package=ImportExport)
focuses on select binary formats (Excel, SPSS, and Access files) and
provides a Shiny interface.
- [**SchemaOnRead**](https://cran.r-project.org/package=SchemaOnRead)
iterates through a large number of possible import methods until one
works successfully
2. **rio** uses `data.table::fread()` for text-delimited files to
automatically determine the file format regardless of the extension.
So, a CSV that is actually tab-separated will still be correctly
imported. It’s also crazy fast.
3. **rio**, wherever possible, does not import character strings as
factors.
4. **rio** supports web-based imports natively, including from SSL
(HTTPS) URLs, from shortened URLs, from URLs that lack proper
extensions, and from (public) Google Documents Spreadsheets.
5. **rio** imports from from single-file .zip and .tar archives
automatically, without the need to explicitly decompress them.
Export to compressed directories is also supported.
6. **rio** wraps a variety of faster, more stream-lined I/O packages
than those provided by base R or the **foreign** package. It uses
[**data.table**](https://cran.r-project.org/package=data.table) for
delimited formats,
[**haven**](https://cran.r-project.org/package=haven) for SAS,
Stata, and SPSS files, smarter and faster fixed-width file import
and export routines, and
[**readxl**](https://cran.r-project.org/package=readxl) and
[**openxlsx**](https://cran.r-project.org/package=openxlsx) for
reading and writing Excel workbooks.
7. **rio** stores metadata from rich file formats (SPSS, Stata, etc.)
in variable-level attributes in a consistent form regardless of file
type or underlying import function. These attributes are identified
as:
- `label`: a description of variable
- `labels`: a vector mapping numeric values to character strings
those values represent
- `format`: a character string describing the variable storage
type in the original file
The `gather_attrs()` function makes it easy to move variable-level
attributes to the data frame level (and `spread_attrs()` reverses
that gathering process). These can be useful, especially, during
file conversion to more easily modify attributes that are handled
differently across file formats. As an example, the following idiom
can be used to trim SPSS value labels to the 32-character maximum
allowed by Stata:
``` r
dat <- gather_attrs(rio::import("data.sav"))
attr(dat, "labels") <- lapply(attributes(dat)$labels, function(x) {
if (!is.null(x)) {
names(x) <- substring(names(x), 1, 32)
}
x
})
export(spread_attrs(dat), "data.dta")
```
In addition, two functions (added in v0.5.5) provide easy ways to
create character and factor variables from these “labels”
attributes. `characterize()` converts a single variable or all
variables in a data frame that have “labels” attributes into
character vectors based on the mapping of values to value labels.
`factorize()` does the same but returns factor variables. This can
be especially helpful for converting these rich file formats into
open formats (e.g., `export(characterize(import("file.dta")),
"file.csv")`.
8. **rio** imports and exports files based on an internal S3 class
infrastructure. This means that other packages can contain
extensions to **rio** by registering S3 methods. These methods
should take the form `.import.rio_X()` and `.export.rio_X()`, where
`X` is the file extension of a file type. An example is provided in
the [rio.db package](https://github.com/leeper/rio.db).
rio/man/ 0000755 0001762 0000144 00000000000 14014451034 011612 5 ustar ligges users rio/man/import.Rd 0000644 0001762 0000144 00000022101 14135736506 013425 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/import.R
\name{import}
\alias{import}
\title{Import}
\usage{
import(file, format, setclass, which, ...)
}
\arguments{
\item{file}{A character string naming a file, URL, or single-file .zip or .tar archive.}
\item{format}{An optional character string code of file format, which can be used to override the format inferred from \code{file}. Shortcuts include: \dQuote{,} (for comma-separated values), \dQuote{;} (for semicolon-separated values), and \dQuote{|} (for pipe-separated values).}
\item{setclass}{An optional character vector specifying one or more classes to set on the import. By default, the return object is always a \dQuote{data.frame}. Allowed values include \dQuote{tbl_df}, \dQuote{tbl}, or \dQuote{tibble} (if using dplyr) or \dQuote{data.table} (if using data.table). Other values are ignored, such that a data.frame is returned.}
\item{which}{This argument is used to control import from multi-object files; as a rule \code{import} only ever returns a single data frame (use \code{\link{import_list}} to import multiple data frames from a multi-object file). If \code{file} is a compressed directory, \code{which} can be either a character string specifying a filename or an integer specifying which file (in locale sort order) to extract from the compressed directory. For Excel spreadsheets, this can be used to specify a sheet name or number. For .Rdata files, this can be an object name. For HTML files, it identifies which table to extract (from document order). Ignored otherwise. A character string value will be used as a regular expression, such that the extracted file is the first match of the regular expression against the file names in the archive.}
\item{\dots}{Additional arguments passed to the underlying import functions. For example, this can control column classes for delimited file types, or control the use of haven for Stata and SPSS or readxl for Excel (.xlsx) format. See details below.}
}
\value{
A data frame. If \code{setclass} is used, this data frame may have additional class attribute values, such as \dQuote{tibble} or \dQuote{data.table}.
}
\description{
Read in a data.frame from a file. Exceptions to this rule are Rdata, RDS, and JSON input file formats, which return the originally saved object without changing its class.
}
\details{
This function imports a data frame or matrix from a data file with the file format based on the file extension (or the manually specified format, if \code{format} is specified).
\code{import} supports the following file formats:
\itemize{
\item Comma-separated data (.csv), using \code{\link[data.table]{fread}} or, if \code{fread = FALSE}, \code{\link[utils]{read.table}} with \code{row.names = FALSE} and \code{stringsAsFactors = FALSE}
\item Pipe-separated data (.psv), using \code{\link[data.table]{fread}} or, if \code{fread = FALSE}, \code{\link[utils]{read.table}} with \code{sep = '|'}, \code{row.names = FALSE} and \code{stringsAsFactors = FALSE}
\item Tab-separated data (.tsv), using \code{\link[data.table]{fread}} or, if \code{fread = FALSE}, \code{\link[utils]{read.table}} with \code{row.names = FALSE} and \code{stringsAsFactors = FALSE}
\item SAS (.sas7bdat), using \code{\link[haven]{read_sas}}.
\item SAS XPORT (.xpt), using \code{\link[haven]{read_xpt}} or, if \code{haven = FALSE}, \code{\link[foreign]{read.xport}}.
\item SPSS (.sav), using \code{\link[haven]{read_sav}}. If \code{haven = FALSE}, \code{\link[foreign]{read.spss}} can be used.
\item SPSS compressed (.zsav), using \code{\link[haven]{read_sav}}.
\item Stata (.dta), using \code{\link[haven]{read_dta}}. If \code{haven = FALSE}, \code{\link[foreign]{read.dta}} can be used.
\item SPSS Portable Files (.por), using \code{\link[haven]{read_por}}.
\item Excel (.xls and .xlsx), using \code{\link[readxl]{read_excel}}. Use \code{which} to specify a sheet number. For .xlsx files, it is possible to set \code{readxl = FALSE}, so that \code{\link[openxlsx]{read.xlsx}} can be used instead of readxl (the default).
\item R syntax object (.R), using \code{\link[base]{dget}}
\item Saved R objects (.RData,.rda), using \code{\link[base]{load}} for single-object .Rdata files. Use \code{which} to specify an object name for multi-object .Rdata files. This can be any R object (not just a data frame).
\item Serialized R objects (.rds), using \code{\link[base]{readRDS}}. This can be any R object (not just a data frame).
\item Epiinfo (.rec), using \code{\link[foreign]{read.epiinfo}}
\item Minitab (.mtp), using \code{\link[foreign]{read.mtp}}
\item Systat (.syd), using \code{\link[foreign]{read.systat}}
\item "XBASE" database files (.dbf), using \code{\link[foreign]{read.dbf}}
\item Weka Attribute-Relation File Format (.arff), using \code{\link[foreign]{read.arff}}
\item Data Interchange Format (.dif), using \code{\link[utils]{read.DIF}}
\item Fortran data (no recognized extension), using \code{\link[utils]{read.fortran}}
\item Fixed-width format data (.fwf), using a faster version of \code{\link[utils]{read.fwf}} that requires a \code{widths} argument and by default in rio has \code{stringsAsFactors = FALSE}. If \code{readr = TRUE}, import will be performed using \code{\link[readr]{read_fwf}}, where \code{widths} should be: \code{NULL}, a vector of column widths, or the output of \code{\link[readr]{fwf_empty}}, \code{\link[readr]{fwf_widths}}, or \code{\link[readr]{fwf_positions}}.
\item gzip comma-separated data (.csv.gz), using \code{\link[utils]{read.table}} with \code{row.names = FALSE} and \code{stringsAsFactors = FALSE}
\item \href{https://github.com/csvy}{CSVY} (CSV with a YAML metadata header) using \code{\link[data.table]{fread}}.
\item Apache Arrow Parquet (.parquet), using \code{\link[arrow]{read_parquet}}
\item Feather R/Python interchange format (.feather), using \code{\link[feather]{read_feather}}
\item Fast storage (.fst), using \code{\link[fst]{read.fst}}
\item JSON (.json), using \code{\link[jsonlite]{fromJSON}}
\item Matlab (.mat), using \code{\link[rmatio]{read.mat}}
\item EViews (.wf1), using \code{\link[hexView]{readEViews}}
\item OpenDocument Spreadsheet (.ods), using \code{\link[readODS]{read_ods}}. Use \code{which} to specify a sheet number.
\item Single-table HTML documents (.html), using \code{\link[xml2]{read_html}}. The data structure will only be read correctly if the HTML file can be converted to a list via \code{\link[xml2]{as_list}}.
\item Shallow XML documents (.xml), using \code{\link[xml2]{read_xml}}. The data structure will only be read correctly if the XML file can be converted to a list via \code{\link[xml2]{as_list}}.
\item YAML (.yml), using \code{\link[yaml]{yaml.load}}
\item Clipboard import (on Windows and Mac OS), using \code{\link[utils]{read.table}} with \code{row.names = FALSE}
\item Google Sheets, as Comma-separated data (.csv)
\item GraphPad Prism (.pzfx) using \code{\link[pzfx]{read_pzfx}}
}
\code{import} attempts to standardize the return value from the various import functions to the extent possible, thus providing a uniform data structure regardless of what import package or function is used. It achieves this by storing any optional variable-related attributes at the variable level (i.e., an attribute for \code{mtcars$mpg} is stored in \code{attributes(mtcars$mpg)} rather than \code{attributes(mtcars)}). If you would prefer these attributes to be stored at the data.frame-level (i.e., in \code{attributes(mtcars)}), see \code{\link{gather_attrs}}.
After importing metadata-rich file formats (e.g., from Stata or SPSS), it may be helpful to recode labelled variables to character or factor using \code{\link{characterize}} or \code{\link{factorize}} respectively.
}
\note{
For csv and txt files with row names exported from \code{\link{export}}, it may be helpful to specify \code{row.names} as the column of the table which contain row names. See example below.
}
\examples{
# create CSV to import
export(iris, csv_file <- tempfile(fileext = ".csv"))
# specify `format` to override default format
export(iris, tsv_file <- tempfile(fileext = ".tsv"), format = "csv")
stopifnot(identical(import(csv_file), import(tsv_file, format = "csv")))
# import CSV as a `data.table`
stopifnot(inherits(import(csv_file, setclass = "data.table"), "data.table"))
# pass arguments to underlying import function
iris1 <- import(csv_file)
identical(names(iris), names(iris1))
export(iris, csv_file2 <- tempfile(fileext = ".csv"), col.names = FALSE)
iris2 <- import(csv_file2)
identical(names(iris), names(iris2))
# set class for the response data.frame as "tbl_df" (from dplyr)
stopifnot(inherits(import(csv_file, setclass = "tbl_df"), "tbl_df"))
# non-data frame formats supported for RDS, Rdata, and JSON
export(list(mtcars, iris), rds_file <- tempfile(fileext = ".rds"))
li <- import(rds_file)
identical(names(mtcars), names(li[[1]]))
# cleanup
unlink(csv_file)
unlink(csv_file2)
unlink(tsv_file)
unlink(rds_file)
}
\seealso{
\code{\link{import_list}}, \code{\link{.import}}, \code{\link{characterize}}, \code{\link{gather_attrs}}, \code{\link{export}}, \code{\link{convert}}
}
rio/man/rio.Rd 0000644 0001762 0000144 00000003236 14135737451 012714 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/rio.R
\docType{package}
\name{rio}
\alias{rio}
\title{A Swiss-Army Knife for Data I/O}
\description{
The aim of rio is to make data file input and output as easy as possible. \code{\link{export}} and \code{\link{import}} serve as a Swiss-army knife for painless data I/O for data from almost any file format by inferring the data structure from the file extension, natively reading web-based data sources, setting reasonable defaults for import and export, and relying on efficient data import and export packages. An additional convenience function, \code{\link{convert}}, provides a simple method for converting between file types.
Note that some of rio's functionality is provided by \sQuote{Suggests} dependendencies, meaning they are not installed by default. Use \code{\link{install_formats}} to make sure these packages are available for use.
}
\examples{
# export
library("datasets")
export(mtcars, csv_file <- tempfile(fileext = ".csv")) # comma-separated values
export(mtcars, rds_file <- tempfile(fileext = ".rds")) # R serialized
export(mtcars, sav_file <- tempfile(fileext = ".sav")) # SPSS
# import
x <- import(csv_file)
y <- import(rds_file)
z <- import(sav_file)
# convert sav (SPSS) to dta (Stata)
convert(sav_file, dta_file <- tempfile(fileext = ".dta"))
# cleanup
unlink(c(csv_file, rds_file, sav_file, dta_file))
}
\references{
\href{https://github.com/Stan125/GREA}{GREA} provides an RStudio add-in to import data using rio.
}
\seealso{
\code{\link{import}}, \code{\link{import_list}}, \code{\link{export}}, \code{\link{export_list}}, \code{\link{convert}}, \code{\link{install_formats}}
}
rio/man/import_list.Rd 0000644 0001762 0000144 00000005502 14135741523 014461 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/import_list.R
\name{import_list}
\alias{import_list}
\title{Import list of data frames}
\usage{
import_list(
file,
setclass,
which,
rbind = FALSE,
rbind_label = "_file",
rbind_fill = TRUE,
...
)
}
\arguments{
\item{file}{A character string containing a single file name for a multi-object file (e.g., Excel workbook, zip directory, or HTML file), or a vector of file paths for multiple files to be imported.}
\item{setclass}{An optional character vector specifying one or more classes to set on the import. By default, the return object is always a \dQuote{data.frame}. Allowed values include \dQuote{tbl_df}, \dQuote{tbl}, or \dQuote{tibble} (if using dplyr) or \dQuote{data.table} (if using data.table). Other values are ignored, such that a data.frame is returned.}
\item{which}{If \code{file} is a single file path, this specifies which objects should be extracted (passed to \code{\link{import}}'s \code{which} argument). Ignored otherwise.}
\item{rbind}{A logical indicating whether to pass the import list of data frames through \code{\link[data.table]{rbindlist}}.}
\item{rbind_label}{If \code{rbind = TRUE}, a character string specifying the name of a column to add to the data frame indicating its source file.}
\item{rbind_fill}{If \code{rbind = TRUE}, a logical indicating whether to set the \code{fill = TRUE} (and fill missing columns with \code{NA}).}
\item{\dots}{Additional arguments passed to \code{\link{import}}. Behavior may be unexpected if files are of different formats.}
}
\value{
If \code{rbind=FALSE} (the default), a list of a data frames. Otherwise, that list is passed to \code{\link[data.table]{rbindlist}} with \code{fill = TRUE} and returns a data frame object of class set by the \code{setclass} argument; if this operation fails, the list is returned.
}
\description{
Use \code{\link{import}} to import a list of data frames from a vector of file names or from a multi-object file (Excel workbook, .Rdata file, zip directory, or HTML file)
}
\examples{
library('datasets')
export(list(mtcars1 = mtcars[1:10,],
mtcars2 = mtcars[11:20,],
mtcars3 = mtcars[21:32,]),
xlsx_file <- tempfile(fileext = ".xlsx")
)
# import a single file from multi-object workbook
str(import(xlsx_file, which = "mtcars1"))
# import all worksheets
str(import_list(xlsx_file), 1)
# import and rbind all worksheets
mtcars2 <- import_list(xlsx_file, rbind = TRUE)
all.equal(mtcars2[,-12], mtcars, check.attributes = FALSE)
# import multiple files
wd <- getwd()
setwd(tempdir())
export(mtcars, "mtcars1.csv")
export(mtcars, "mtcars2.csv")
str(import_list(dir(pattern = "csv$")), 1)
unlink(c("mtcars1.csv", "mtcars2.csv"))
setwd(wd)
# cleanup
unlink(xlsx_file)
}
\seealso{
\code{\link{import}}, \code{\link{export_list}}, \code{\link{export}}
}
rio/man/install_formats.Rd 0000644 0001762 0000144 00000001175 13600364062 015312 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/suggestions.R
\name{install_formats}
\alias{install_formats}
\title{Install rio's \sQuote{Suggests} Dependencies}
\usage{
install_formats(...)
}
\arguments{
\item{\dots}{Additional arguments passed to \code{\link[utils]{install.packages}}.}
}
\value{
\code{NULL}
}
\description{
This function installs various \sQuote{Suggests} dependencies for rio that expand its support to the full range of support import and export formats. These packages are not installed or loaded by default in order to create a slimmer and faster package build, install, and load.
}
rio/man/is_file_text.Rd 0000644 0001762 0000144 00000001675 14135741404 014577 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/is_file_text.R
\name{is_file_text}
\alias{is_file_text}
\title{Determine whether a file is \dQuote{plain-text} or some sort of binary format}
\usage{
is_file_text(file, maxsize = Inf, text_bytes = as.raw(c(7:16, 18, 19, 32:255)))
}
\arguments{
\item{file}{Path to the file}
\item{maxsize}{Maximum number of bytes to read}
\item{text_bytes}{Which characters are used by normal text (though not
necessarily just ASCII). To detect just ASCII, the
following value can be used:
\code{as.raw(c(7:16, 18, 19, 32:127))}}
}
\value{
A logical
}
\description{
Determine whether a file is \dQuote{plain-text} or some sort of binary format
}
\examples{
library(datasets)
export(iris, yml_file <- tempfile(fileext = ".yml"))
is_file_text(yml_file) # TRUE
export(iris, sav_file <- tempfile(fileext = ".sav"))
is_file_text(sav_file) # FALSE
# cleanup
unlink(yml_file)
unlink(sav_file)
}
rio/man/convert.Rd 0000644 0001762 0000144 00000003412 14135737451 013577 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/convert.R
\name{convert}
\alias{convert}
\title{Convert from one file format to another}
\usage{
convert(in_file, out_file, in_opts = list(), out_opts = list())
}
\arguments{
\item{in_file}{A character string naming an input file.}
\item{out_file}{A character string naming an output file.}
\item{in_opts}{A named list of options to be passed to \code{\link{import}}.}
\item{out_opts}{A named list of options to be passed to \code{\link{export}}.}
}
\value{
A character string containing the name of the output file (invisibly).
}
\description{
This function constructs a data frame from a data file using \code{\link{import}} and uses \code{\link{export}} to write the data to disk in the format indicated by the file extension.
}
\examples{
# create a file to convert
export(mtcars, dta_file <- tempfile(fileext = ".dta"))
# convert Stata to CSV and open converted file
convert(dta_file, csv_file <- tempfile(fileext = ".csv"))
head(import(csv_file))
# correct an erroneous file format
export(mtcars, csv_file2 <- tempfile(fileext = ".csv"), format = "tsv")
convert(csv_file2, csv_file, in_opts = list(format = "tsv"))
# convert serialized R data.frame to JSON
export(mtcars, rds_file <- tempfile(fileext = ".rds"))
convert(rds_file, json_file <- tempfile(fileext = ".json"))
# cleanup
unlink(csv_file)
unlink(csv_file2)
unlink(rds_file)
unlink(dta_file)
unlink(json_file)
\dontrun{\donttest{
# convert from the command line:
## Rscript -e "rio::convert('mtcars.dta', 'mtcars.csv')"
}}
}
\seealso{
\href{https://lbraglia.github.io/}{Luca Braglia} has created a Shiny app called \href{https://github.com/lbraglia/rioweb}{rioweb} that provides access to the file conversion features of rio through a web browser.
}
rio/man/extensions.Rd 0000644 0001762 0000144 00000003270 13600364062 014306 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/extensions.R
\name{.import}
\alias{.import}
\alias{extensions}
\alias{.export}
\alias{.import.default}
\alias{.export.default}
\title{rio Extensions}
\usage{
.import(file, ...)
\method{.import}{default}(file, ...)
.export(file, x, ...)
\method{.export}{default}(file, x, ...)
}
\arguments{
\item{file}{A character string naming a file.}
\item{\dots}{Additional arguments passed to methods.}
\item{x}{A data frame or matrix to be written into a file.}
}
\value{
For \code{.import}, an R data.frame. For \code{.export}, \code{file}, invisibly.
}
\description{
Writing Import/Export Extensions for rio
}
\details{
rio implements format-specific S3 methods for each type of file that can be imported from or exported to. This happens via internal S3 generics, \code{.import} and \code{.export}. It is possible to write new methods like with any S3 generic (e.g., \code{print}).
As an example, \code{.import.rio_csv} imports from a comma-separated values file. If you want to produce a method for a new filetype with extension \dQuote{myfile}, you simply have to create a function called \code{.import.rio_myfile} that implements a format-specific importing routine and returns a data.frame. rio will automatically recognize new S3 methods, so that you can then import your file using: \code{import("file.myfile")}.
As general guidance, if an import method creates many attributes, these attributes should be stored --- to the extent possible --- in variable-level attributes fields. These can be \dQuote{gathered} to the data.frame level by the user via \code{\link{gather_attrs}}.
}
\seealso{
\code{\link{import}}, \code{\link{export}}
}
rio/man/gather_attrs.Rd 0000644 0001762 0000144 00000002506 14063055402 014576 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/gather_attrs.R
\name{gather_attrs}
\alias{gather_attrs}
\alias{spread_attrs}
\title{Gather attributes from data frame variables}
\usage{
gather_attrs(x)
spread_attrs(x)
}
\arguments{
\item{x}{A data frame.}
}
\value{
\code{x}, with variable-level attributes stored at the data frame level.
}
\description{
\code{gather_attrs} moves variable-level attributes to the data frame level and \code{spread_attrs} reverses that operation.
}
\details{
\code{\link{import}} attempts to standardize the return value from the various import functions to the extent possible, thus providing a uniform data structure regardless of what import package or function is used. It achieves this by storing any optional variable-related attributes at the variable level (i.e., an attribute for \code{mtcars$mpg} is stored in \code{attributes(mtcars$mpg)} rather than \code{attributes(mtcars)}). \code{gather_attrs} moves these to the data frame level (i.e., in \code{attributes(mtcars)}). \code{spread_attrs} moves attributes back to the variable level.
}
\examples{
e <- try(import("http://www.stata-press.com/data/r13/auto.dta"))
if (!inherits(e, "try-error")) {
str(e)
g <- gather_attrs(e)
str(attributes(e))
str(g)
}
}
\seealso{
\code{\link{import}}, \code{\link{characterize}}
}
rio/man/characterize.Rd 0000644 0001762 0000144 00000003417 14135737451 014570 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/characterize.R
\name{characterize}
\alias{characterize}
\alias{factorize}
\alias{characterize.default}
\alias{characterize.data.frame}
\alias{factorize.default}
\alias{factorize.data.frame}
\title{Character conversion of labelled data}
\usage{
characterize(x, ...)
factorize(x, ...)
\method{characterize}{default}(x, ...)
\method{characterize}{data.frame}(x, ...)
\method{factorize}{default}(x, coerce_character = FALSE, ...)
\method{factorize}{data.frame}(x, ...)
}
\arguments{
\item{x}{A vector or data frame.}
\item{\dots}{additional arguments passed to methods}
\item{coerce_character}{A logical indicating whether to additionally coerce character columns to factor (in \code{factorize}). Default \code{FALSE}.}
}
\description{
Convert labelled variables to character or factor
}
\details{
\code{characterize} converts a vector with a \code{labels} attribute of named levels into a character vector. \code{factorize} does the same but to factors. This can be useful at two stages of a data workflow: (1) importing labelled data from metadata-rich file formats (e.g., Stata or SPSS), and (2) exporting such data to plain text files (e.g., CSV) in a way that preserves information.
}
\examples{
# vector method
x <- structure(1:4, labels = c("A" = 1, "B" = 2, "C" = 3))
characterize(x)
factorize(x)
# data frame method
x <- data.frame(v1 = structure(1:4, labels = c("A" = 1, "B" = 2, "C" = 3)),
v2 = structure(c(1,0,0,1), labels = c("foo" = 0, "bar" = 1)))
str(factorize(x))
str(characterize(x))
# comparison of exported file contents
import(export(x, csv_file <- tempfile(fileext = ".csv")))
import(export(factorize(x), csv_file))
# cleanup
unlink(csv_file)
}
\seealso{
\code{\link{gather_attrs}}
}
rio/man/figures/ 0000755 0001762 0000144 00000000000 13334042064 013261 5 ustar ligges users rio/man/figures/logo.png 0000644 0001762 0000144 00000026770 13334042161 014741 0 ustar ligges users PNG
IHDR U) sBIT|d pHYs N N+ao tEXtSoftware www.inkscape.org< IDATxw|d+tP% AP"]zW(\.tQ&" X E!HBI;?Ͷ@3gߝ33g]h
l-lK
@@!RW ZVCtkX6_2G6~eiWSV`gN5z6
!e3;O8z]ћ|nGw?IR `/2pR[#5ºG P*Oϧ^8ؗyqn\Qvz<K@mZjk1)