haven/ 0000755 0001762 0000144 00000000000 14532122712 011352 5 ustar ligges users haven/NAMESPACE 0000644 0001762 0000144 00000007514 14361266255 012613 0 ustar ligges users # Generated by roxygen2: do not edit by hand
S3method("names<-",haven_labelled)
S3method(as.character,haven_labelled)
S3method(as_factor,data.frame)
S3method(as_factor,haven_labelled)
S3method(as_factor,labelled)
S3method(format,haven_labelled)
S3method(format,pillar_shaft_haven_labelled_chr)
S3method(format,pillar_shaft_haven_labelled_num)
S3method(is.na,haven_labelled_spss)
S3method(levels,haven_labelled)
S3method(median,haven_labelled)
S3method(obj_print_footer,haven_labelled)
S3method(obj_print_footer,haven_labelled_spss)
S3method(obj_print_header,haven_labelled)
S3method(quantile,haven_labelled)
S3method(summary,haven_labelled)
S3method(vec_arith,haven_labelled)
S3method(vec_arith.haven_labelled,default)
S3method(vec_arith.haven_labelled,haven_labelled)
S3method(vec_arith.haven_labelled,numeric)
S3method(vec_arith.numeric,haven_labelled)
S3method(vec_cast,character.haven_labelled)
S3method(vec_cast,character.haven_labelled_spss)
S3method(vec_cast,double.haven_labelled)
S3method(vec_cast,double.haven_labelled_spss)
S3method(vec_cast,haven_labelled.character)
S3method(vec_cast,haven_labelled.double)
S3method(vec_cast,haven_labelled.haven_labelled)
S3method(vec_cast,haven_labelled.haven_labelled_spss)
S3method(vec_cast,haven_labelled.integer)
S3method(vec_cast,haven_labelled_spss.character)
S3method(vec_cast,haven_labelled_spss.double)
S3method(vec_cast,haven_labelled_spss.haven_labelled)
S3method(vec_cast,haven_labelled_spss.haven_labelled_spss)
S3method(vec_cast,haven_labelled_spss.integer)
S3method(vec_cast,integer.haven_labelled)
S3method(vec_cast,integer.haven_labelled_spss)
S3method(vec_math,haven_labelled)
S3method(vec_ptype2,character.haven_labelled)
S3method(vec_ptype2,character.haven_labelled_spss)
S3method(vec_ptype2,double.haven_labelled)
S3method(vec_ptype2,double.haven_labelled_spss)
S3method(vec_ptype2,haven_labelled.character)
S3method(vec_ptype2,haven_labelled.double)
S3method(vec_ptype2,haven_labelled.haven_labelled)
S3method(vec_ptype2,haven_labelled.haven_labelled_spss)
S3method(vec_ptype2,haven_labelled.integer)
S3method(vec_ptype2,haven_labelled_spss.character)
S3method(vec_ptype2,haven_labelled_spss.double)
S3method(vec_ptype2,haven_labelled_spss.haven_labelled)
S3method(vec_ptype2,haven_labelled_spss.haven_labelled_spss)
S3method(vec_ptype2,haven_labelled_spss.integer)
S3method(vec_ptype2,integer.haven_labelled)
S3method(vec_ptype2,integer.haven_labelled_spss)
S3method(vec_ptype_abbr,haven_labelled)
S3method(vec_ptype_abbr,haven_labelled_spss)
S3method(vec_ptype_full,haven_labelled)
S3method(vec_ptype_full,haven_labelled_spss)
S3method(zap_formats,data.frame)
S3method(zap_formats,default)
S3method(zap_label,data.frame)
S3method(zap_label,default)
S3method(zap_labels,data.frame)
S3method(zap_labels,default)
S3method(zap_labels,haven_labelled)
S3method(zap_labels,haven_labelled_spss)
S3method(zap_missing,data.frame)
S3method(zap_missing,default)
S3method(zap_missing,haven_labelled)
S3method(zap_missing,haven_labelled_spss)
S3method(zap_widths,data.frame)
S3method(zap_widths,default)
export(as_factor)
export(format_tagged_na)
export(is.labelled)
export(is_tagged_na)
export(labelled)
export(labelled_spss)
export(na_tag)
export(print_labels)
export(print_tagged_na)
export(read_dta)
export(read_por)
export(read_sas)
export(read_sav)
export(read_spss)
export(read_stata)
export(read_xpt)
export(tagged_na)
export(vec_arith.haven_labelled)
export(write_dta)
export(write_sas)
export(write_sav)
export(write_xpt)
export(zap_empty)
export(zap_formats)
export(zap_label)
export(zap_labels)
export(zap_missing)
export(zap_widths)
import(rlang)
import(vctrs)
importFrom(cli,cli_abort)
importFrom(cli,cli_warn)
importFrom(forcats,as_factor)
importFrom(hms,hms)
importFrom(lifecycle,deprecated)
importFrom(methods,setOldClass)
importFrom(stats,median)
importFrom(stats,quantile)
importFrom(tibble,tibble)
useDynLib(haven, .registration = TRUE)
haven/LICENSE 0000644 0001762 0000144 00000000053 14520237654 012366 0 ustar ligges users YEAR: 2023
COPYRIGHT HOLDER: haven authors
haven/README.md 0000644 0001762 0000144 00000005533 14520204132 012631 0 ustar ligges users
# haven
[](https://cran.r-project.org/package=haven)
[](https://github.com/tidyverse/haven/actions/workflows/R-CMD-check.yaml)
[](https://app.codecov.io/gh/tidyverse/haven?branch=main)
## Overview
Haven enables R to read and write various data formats used by other
statistical packages by wrapping the fantastic
[ReadStat](https://github.com/WizardMac/ReadStat) C library written by
[Evan Miller](https://www.evanmiller.org). Haven is part of the
[tidyverse](https://www.tidyverse.org/). Currently it supports:
- **SAS**: `read_sas()` reads `.sas7bdat` + `.sas7bcat` files and
`read_xpt()` reads SAS transport files (versions 5 and 8).
`write_xpt()` writes SAS transport files (versions 5 and 8).
- **SPSS**: `read_sav()` reads `.sav` files and `read_por()` reads the
older `.por` files. `write_sav()` writes `.sav` files.
- **Stata**: `read_dta()` reads `.dta` files (up to version 15).
`write_dta()` writes `.dta` files (versions 8-15).
The output objects:
- Are [tibbles](https://github.com/tidyverse/tibble), which have a
better print method for very long and very wide files.
- Translate value labels into a new `labelled()` class, which preserves
the original semantics and can easily be coerced to factors with
`as_factor()`. Special missing values are preserved. See
`vignette("semantics")` for more details.
- Dates and times are converted to R date/time classes. Character
vectors are not converted to factors.
## Installation
``` r
# The easiest way to get haven is to install the whole tidyverse:
install.packages("tidyverse")
# Alternatively, install just haven:
install.packages("haven")
```
## Usage
``` r
library(haven)
# SAS
read_sas("mtcars.sas7bdat")
write_xpt(mtcars, "mtcars.xpt")
# SPSS
read_sav("mtcars.sav")
write_sav(mtcars, "mtcars.sav")
# Stata
read_dta("mtcars.dta")
write_dta(mtcars, "mtcars.dta")
```
## Related work
- [foreign](https://cran.r-project.org/package=foreign) reads from SAS
XPORT, SPSS, and Stata (up to version 12) files.
- [readstat13](https://cran.r-project.org/package=readstata13) reads
from and writes to all Stata file format versions.
- [sas7bdat](https://cran.r-project.org/package=sas7bdat) reads from
SAS7BDAT files.
## Code of Conduct
Please note that the haven project is released with a [Contributor Code
of Conduct](https://haven.tidyverse.org/CODE_OF_CONDUCT.html). By
contributing to this project, you agree to abide by its terms.
haven/man/ 0000755 0001762 0000144 00000000000 14520237654 012136 5 ustar ligges users haven/man/vec_arith.haven_labelled.Rd 0000644 0001762 0000144 00000000457 14033646021 017312 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/labelled.R
\name{vec_arith.haven_labelled}
\alias{vec_arith.haven_labelled}
\title{Internal vctrs methods}
\usage{
\method{vec_arith}{haven_labelled}(op, x, y, ...)
}
\description{
Internal vctrs methods
}
\keyword{internal}
haven/man/tagged_na.Rd 0000644 0001762 0000144 00000003113 14033646021 014323 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/tagged_na.R
\name{tagged_na}
\alias{tagged_na}
\alias{na_tag}
\alias{is_tagged_na}
\alias{format_tagged_na}
\alias{print_tagged_na}
\title{"Tagged" missing values}
\usage{
tagged_na(...)
na_tag(x)
is_tagged_na(x, tag = NULL)
format_tagged_na(x, digits = getOption("digits"))
print_tagged_na(x, digits = getOption("digits"))
}
\arguments{
\item{...}{Vectors containing single character. The letter will be used to
"tag" the missing value.}
\item{x}{A numeric vector}
\item{tag}{If \code{NULL}, will only return true if the tag has this value.}
\item{digits}{Number of digits to use in string representation}
}
\description{
"Tagged" missing values work exactly like regular R missing values except
that they store one additional byte of information a tag, which is usually
a letter ("a" to "z"). When by loading a SAS and Stata file, the tagged
missing values always use lower case values.
}
\details{
\code{format_tagged_na()} and \code{print_tagged_na()} format tagged
NA's as NA(a), NA(b), etc.
}
\examples{
x <- c(1:5, tagged_na("a"), tagged_na("z"), NA)
# Tagged NA's work identically to regular NAs
x
is.na(x)
# To see that they're special, you need to use na_tag(),
# is_tagged_na(), or print_tagged_na():
is_tagged_na(x)
na_tag(x)
print_tagged_na(x)
# You can test for specific tagged NAs with the second argument
is_tagged_na(x, "a")
# Because the support for tagged's NAs is somewhat tagged on to R,
# the left-most NA will tend to be preserved in arithmetic operations.
na_tag(tagged_na("a") + tagged_na("z"))
}
haven/man/read_spss.Rd 0000644 0001762 0000144 00000012306 14376672143 014417 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/haven-spss.R
\name{read_spss}
\alias{read_spss}
\alias{read_sav}
\alias{read_por}
\alias{write_sav}
\title{Read and write SPSS files}
\usage{
read_sav(
file,
encoding = NULL,
user_na = FALSE,
col_select = NULL,
skip = 0,
n_max = Inf,
.name_repair = "unique"
)
read_por(
file,
user_na = FALSE,
col_select = NULL,
skip = 0,
n_max = Inf,
.name_repair = "unique"
)
write_sav(data, path, compress = c("byte", "none", "zsav"), adjust_tz = TRUE)
read_spss(
file,
user_na = FALSE,
col_select = NULL,
skip = 0,
n_max = Inf,
.name_repair = "unique"
)
}
\arguments{
\item{file}{Either a path to a file, a connection, or literal data
(either a single string or a raw vector).
Files ending in \code{.gz}, \code{.bz2}, \code{.xz}, or \code{.zip} will
be automatically uncompressed. Files starting with \verb{http://},
\verb{https://}, \verb{ftp://}, or \verb{ftps://} will be automatically
downloaded. Remote gz files can also be automatically downloaded and
decompressed.
Literal data is most useful for examples and tests. To be recognised as
literal data, the input must be either wrapped with \code{I()}, be a string
containing at least one new line, or be a vector containing at least one
string with a new line.
Using a value of \code{\link[readr:clipboard]{clipboard()}} will read from the system clipboard.}
\item{encoding}{The character encoding used for the file. The default,
\code{NULL}, use the encoding specified in the file, but sometimes this
value is incorrect and it is useful to be able to override it.}
\item{user_na}{If \code{TRUE} variables with user defined missing will
be read into \code{\link[=labelled_spss]{labelled_spss()}} objects. If \code{FALSE}, the
default, user-defined missings will be converted to \code{NA}.}
\item{col_select}{One or more selection expressions, like in
\code{\link[dplyr:select]{dplyr::select()}}. Use \code{c()} or \code{list()} to use more than one expression.
See \code{?dplyr::select} for details on available selection options. Only the
specified columns will be read from \code{data_file}.}
\item{skip}{Number of lines to skip before reading data.}
\item{n_max}{Maximum number of lines to read.}
\item{.name_repair}{Treatment of problematic column names:
\itemize{
\item \code{"minimal"}: No name repair or checks, beyond basic existence,
\item \code{"unique"}: Make sure names are unique and not empty,
\item \code{"check_unique"}: (default value), no name repair, but check they are
\code{unique},
\item \code{"universal"}: Make the names \code{unique} and syntactic
\item a function: apply custom name repair (e.g., \code{.name_repair = make.names}
for names in the style of base R).
\item A purrr-style anonymous function, see \code{\link[rlang:as_function]{rlang::as_function()}}
}
This argument is passed on as \code{repair} to \code{\link[vctrs:vec_as_names]{vctrs::vec_as_names()}}.
See there for more details on these terms and the strategies used
to enforce them.}
\item{data}{Data frame to write.}
\item{path}{Path to a file where the data will be written.}
\item{compress}{Compression type to use:
\itemize{
\item "byte": the default, uses byte compression.
\item "none": no compression. This is useful for software that has issues with
byte compressed \code{.sav} files (e.g. SAS).
\item "zsav": uses zlib compression and produces a \code{.zsav} file. zlib
compression is supported by SPSS version 21.0 and above.
}
\code{TRUE} and \code{FALSE} can be used for backwards compatibility, and correspond
to the "zsav" and "none" options respectively.}
\item{adjust_tz}{Stata, SPSS and SAS do not have a concept of time zone,
and all \link{date-time} variables are treated as UTC. \code{adjust_tz} controls
how the timezone of date-time values is treated when writing.
\itemize{
\item If \code{TRUE} (the default) the timezone of date-time values is ignored, and
they will display the same in R and Stata/SPSS/SAS, e.g.
\code{"2010-01-01 09:00:00 NZDT"} will be written as \code{"2010-01-01 09:00:00"}.
Note that this changes the underlying numeric data, so use caution if
preserving between-time-point differences is critical.
\item If \code{FALSE}, date-time values are written as the corresponding UTC value,
e.g. \code{"2010-01-01 09:00:00 NZDT"} will be written as
\code{"2009-12-31 20:00:00"}.
}}
}
\value{
A tibble, data frame variant with nice defaults.
Variable labels are stored in the "label" attribute of each variable.
It is not printed on the console, but the RStudio viewer will show it.
\code{write_sav()} returns the input \code{data} invisibly.
}
\description{
\code{read_sav()} reads both \code{.sav} and \code{.zsav} files; \code{write_sav()} creates
\code{.zsav} files when \code{compress = TRUE}. \code{read_por()} reads \code{.por} files.
\code{read_spss()} uses either \code{read_por()} or \code{read_sav()} based on the
file extension.
}
\details{
Currently haven can read and write logical, integer, numeric, character
and factors. See \code{\link[=labelled_spss]{labelled_spss()}} for how labelled variables in
SPSS are handled in R.
}
\examples{
path <- system.file("examples", "iris.sav", package = "haven")
read_sav(path)
tmp <- tempfile(fileext = ".sav")
write_sav(mtcars, tmp)
read_sav(tmp)
}
haven/man/labelled.Rd 0000644 0001762 0000144 00000003545 14225304450 014167 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/labelled.R
\name{labelled}
\alias{labelled}
\alias{is.labelled}
\title{Create a labelled vector.}
\usage{
labelled(x = double(), labels = NULL, label = NULL)
is.labelled(x)
}
\arguments{
\item{x}{A vector to label. Must be either numeric (integer or double) or
character.}
\item{labels}{A named vector or \code{NULL}. The vector should be the same type
as \code{x}. Unlike factors, labels don't need to be exhaustive: only a fraction
of the values might be labelled.}
\item{label}{A short, human-readable description of the vector.}
}
\description{
A labelled vector is a common data structure in other statistical
environments, allowing you to assign text labels to specific values.
This class makes it possible to import such labelled vectors in to R
without loss of fidelity. This class provides few methods, as I
expect you'll coerce to a standard R class (e.g. a \code{\link[=factor]{factor()}})
soon after importing.
}
\examples{
s1 <- labelled(c("M", "M", "F"), c(Male = "M", Female = "F"))
s2 <- labelled(c(1, 1, 2), c(Male = 1, Female = 2))
s3 <- labelled(
c(1, 1, 2),
c(Male = 1, Female = 2),
label = "Assigned sex at birth"
)
# Unfortunately it's not possible to make as.factor work for labelled objects
# so instead use as_factor. This works for all types of labelled vectors.
as_factor(s1)
as_factor(s1, levels = "values")
as_factor(s2)
# Other statistical software supports multiple types of missing values
s3 <- labelled(
c("M", "M", "F", "X", "N/A"),
c(Male = "M", Female = "F", Refused = "X", "Not applicable" = "N/A")
)
s3
as_factor(s3)
# Often when you have a partially labelled numeric vector, labelled values
# are special types of missing. Use zap_labels to replace labels with missing
# values
x <- labelled(c(1, 2, 1, 2, 10, 9), c(Unknown = 9, Refused = 10))
zap_labels(x)
}
haven/man/zap_widths.Rd 0000644 0001762 0000144 00000001204 14033646021 014565 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/zap_widths.R
\name{zap_widths}
\alias{zap_widths}
\title{Remove display width attributes}
\usage{
zap_widths(x)
}
\arguments{
\item{x}{A vector or data frame.}
}
\description{
To provide some mild support for round-tripping variables between SPSS
and R, haven stores display widths in an attribute: \code{display_width}. If this
causes problems for your code, you can get rid of them with \code{zap_widths}.
}
\seealso{
Other zappers:
\code{\link{zap_empty}()},
\code{\link{zap_formats}()},
\code{\link{zap_labels}()},
\code{\link{zap_label}()}
}
\concept{zappers}
haven/man/print_labels.Rd 0000644 0001762 0000144 00000001220 14033646021 015065 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/labelled.R
\name{print_labels}
\alias{print_labels}
\title{Print the labels of a labelled vector}
\usage{
print_labels(x, name = NULL)
}
\arguments{
\item{x}{A labelled vector}
\item{name}{The name of the vector (optional)}
}
\description{
This is a convenience function, useful to explore the variables of
a newly imported dataset.
}
\examples{
s1 <- labelled(c("M", "M", "F"), c(Male = "M", Female = "F"))
s2 <- labelled(c(1, 1, 2), c(Male = 1, Female = 2))
labelled_df <- tibble::tibble(s1, s2)
for (var in names(labelled_df)) {
print_labels(labelled_df[[var]], var)
}
}
haven/man/labelled_spss.Rd 0000644 0001762 0000144 00000002622 14225304450 015232 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/labelled_spss.R
\name{labelled_spss}
\alias{labelled_spss}
\title{Labelled vectors for SPSS}
\usage{
labelled_spss(
x = double(),
labels = NULL,
na_values = NULL,
na_range = NULL,
label = NULL
)
}
\arguments{
\item{x}{A vector to label. Must be either numeric (integer or double) or
character.}
\item{labels}{A named vector or \code{NULL}. The vector should be the same type
as \code{x}. Unlike factors, labels don't need to be exhaustive: only a fraction
of the values might be labelled.}
\item{na_values}{A vector of values that should also be considered as missing.}
\item{na_range}{A numeric vector of length two giving the (inclusive) extents
of the range. Use \code{-Inf} and \code{Inf} if you want the range to be
open ended.}
\item{label}{A short, human-readable description of the vector.}
}
\description{
This class is only used when \code{user_na = TRUE} in
\code{\link[=read_sav]{read_sav()}}. It is similar to the \code{\link[=labelled]{labelled()}} class
but it also models SPSS's user-defined missings, which can be up to
three distinct values, or for numeric vectors a range.
}
\examples{
x1 <- labelled_spss(1:10, c(Good = 1, Bad = 8), na_values = c(9, 10))
is.na(x1)
x2 <- labelled_spss(
1:10,
c(Good = 1, Bad = 8),
na_range = c(9, Inf),
label = "Quality rating"
)
is.na(x2)
# Print data and metadata
x2
}
haven/man/zap_formats.Rd 0000644 0001762 0000144 00000001262 14033646021 014742 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/zap_formats.R
\name{zap_formats}
\alias{zap_formats}
\title{Remove format attributes}
\usage{
zap_formats(x)
}
\arguments{
\item{x}{A vector or data frame.}
}
\description{
To provide some mild support for round-tripping variables between Stata/SPSS
and R, haven stores variable formats in an attribute: \code{format.stata},
\code{format.spss}, or \code{format.sas}. If this causes problems for your
code, you can get rid of them with \code{zap_formats}.
}
\seealso{
Other zappers:
\code{\link{zap_empty}()},
\code{\link{zap_labels}()},
\code{\link{zap_label}()},
\code{\link{zap_widths}()}
}
\concept{zappers}
haven/man/zap_empty.Rd 0000644 0001762 0000144 00000001103 14101766333 014424 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/zap_empty.R
\name{zap_empty}
\alias{zap_empty}
\title{Convert empty strings into missing values}
\usage{
zap_empty(x)
}
\arguments{
\item{x}{A character vector}
}
\value{
A character vector with empty strings replaced by missing values.
}
\description{
Convert empty strings into missing values
}
\examples{
x <- c("a", "", "c")
zap_empty(x)
}
\seealso{
Other zappers:
\code{\link{zap_formats}()},
\code{\link{zap_labels}()},
\code{\link{zap_label}()},
\code{\link{zap_widths}()}
}
\concept{zappers}
haven/man/figures/ 0000755 0001762 0000144 00000000000 14520233117 013570 5 ustar ligges users haven/man/figures/lifecycle-defunct.svg 0000644 0001762 0000144 00000001704 14225304450 017701 0 ustar ligges users