readr/ 0000755 0001762 0000144 00000000000 14547623042 011356 5 ustar ligges users readr/NAMESPACE 0000644 0001762 0000144 00000006702 14547552156 012611 0 ustar ligges users # Generated by roxygen2: do not edit by hand
S3method("[",spec_tbl_df)
S3method(as.character,col_spec)
S3method(as.col_spec,"NULL")
S3method(as.col_spec,character)
S3method(as.col_spec,col_spec)
S3method(as.col_spec,data.frame)
S3method(as.col_spec,default)
S3method(as.col_spec,list)
S3method(as.data.frame,spec_tbl_df)
S3method(as_tibble,spec_tbl_df)
S3method(format,col_spec)
S3method(output_column,POSIXt)
S3method(output_column,default)
S3method(output_column,double)
S3method(print,col_spec)
S3method(print,collector)
S3method(print,date_names)
S3method(print,locale)
S3method(str,col_spec)
S3method(type_to_col,Date)
S3method(type_to_col,POSIXct)
S3method(type_to_col,default)
S3method(type_to_col,double)
S3method(type_to_col,factor)
S3method(type_to_col,hms)
S3method(type_to_col,integer)
S3method(type_to_col,logical)
export(AccumulateCallback)
export(ChunkCallback)
export(DataFrameCallback)
export(ListCallback)
export(SideEffectChunkCallback)
export(as.col_spec)
export(clipboard)
export(col_character)
export(col_date)
export(col_datetime)
export(col_double)
export(col_factor)
export(col_guess)
export(col_integer)
export(col_logical)
export(col_number)
export(col_skip)
export(col_time)
export(cols)
export(cols_condense)
export(cols_only)
export(count_fields)
export(datasource)
export(date_names)
export(date_names_lang)
export(date_names_langs)
export(default_locale)
export(edition_get)
export(format_csv)
export(format_csv2)
export(format_delim)
export(format_tsv)
export(fwf_cols)
export(fwf_empty)
export(fwf_positions)
export(fwf_widths)
export(guess_encoding)
export(guess_parser)
export(local_edition)
export(locale)
export(melt_csv)
export(melt_csv2)
export(melt_csv2_chunked)
export(melt_csv_chunked)
export(melt_delim)
export(melt_delim_chunked)
export(melt_fwf)
export(melt_table)
export(melt_table2)
export(melt_tsv)
export(melt_tsv_chunked)
export(output_column)
export(parse_character)
export(parse_date)
export(parse_datetime)
export(parse_double)
export(parse_factor)
export(parse_guess)
export(parse_integer)
export(parse_logical)
export(parse_number)
export(parse_time)
export(parse_vector)
export(problems)
export(read_builtin)
export(read_csv)
export(read_csv2)
export(read_csv2_chunked)
export(read_csv_chunked)
export(read_delim)
export(read_delim_chunked)
export(read_file)
export(read_file_raw)
export(read_fwf)
export(read_lines)
export(read_lines_chunked)
export(read_lines_raw)
export(read_lines_raw_chunked)
export(read_log)
export(read_rds)
export(read_table)
export(read_table2)
export(read_tsv)
export(read_tsv_chunked)
export(readr_example)
export(readr_threads)
export(should_read_lazy)
export(should_show_types)
export(show_progress)
export(spec)
export(spec_csv)
export(spec_csv2)
export(spec_delim)
export(spec_table)
export(spec_tsv)
export(stop_for_problems)
export(tokenize)
export(tokenizer_csv)
export(tokenizer_delim)
export(tokenizer_fwf)
export(tokenizer_line)
export(tokenizer_log)
export(tokenizer_tsv)
export(tokenizer_ws)
export(type_convert)
export(with_edition)
export(write_csv)
export(write_csv2)
export(write_delim)
export(write_excel_csv)
export(write_excel_csv2)
export(write_file)
export(write_lines)
export(write_rds)
export(write_tsv)
importFrom(R6,R6Class)
importFrom(hms,hms)
importFrom(lifecycle,deprecate_soft)
importFrom(lifecycle,deprecate_warn)
importFrom(lifecycle,deprecated)
importFrom(lifecycle,is_present)
importFrom(methods,setOldClass)
importFrom(tibble,as_tibble)
importFrom(tibble,tibble)
useDynLib(readr, .registration = TRUE)
readr/LICENSE 0000644 0001762 0000144 00000000053 14371264576 012371 0 ustar ligges users YEAR: 2023
COPYRIGHT HOLDER: readr authors
readr/README.md 0000644 0001762 0000144 00000022641 14547547012 012644 0 ustar ligges users
# readr
[](https://CRAN.R-project.org/package=readr)
[](https://github.com/tidyverse/readr/actions/workflows/R-CMD-check.yaml)
[](https://app.codecov.io/gh/tidyverse/readr?branch=main)
## Overview
The goal of readr is to provide a fast and friendly way to read
rectangular data from delimited files, such as comma-separated values
(CSV) and tab-separated values (TSV). It is designed to parse many types
of data found in the wild, while providing an informative problem report
when parsing leads to unexpected results. If you are new to readr, the
best place to start is the [data import
chapter](https://r4ds.hadley.nz/data-import) in R for Data Science.
## Installation
``` r
# The easiest way to get readr is to install the whole tidyverse:
install.packages("tidyverse")
# Alternatively, install just readr:
install.packages("readr")
```
``` r
# Or you can install the development version from GitHub:
# install.packages("pak")
pak::pak("tidyverse/readr")
```
## Cheatsheet
## Usage
readr is part of the core tidyverse, so you can load it with:
``` r
library(tidyverse)
#> ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
#> ✔ dplyr 1.1.4 ✔ readr 2.1.4.9000
#> ✔ forcats 1.0.0 ✔ stringr 1.5.1
#> ✔ ggplot2 3.4.3 ✔ tibble 3.2.1
#> ✔ lubridate 1.9.3 ✔ tidyr 1.3.0
#> ✔ purrr 1.0.2
#> ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
#> ✖ dplyr::filter() masks stats::filter()
#> ✖ dplyr::lag() masks stats::lag()
#> ℹ Use the conflicted package () to force all conflicts to become errors
```
Of course, you can also load readr as an individual package:
``` r
library(readr)
```
To read a rectangular dataset with readr, you combine two pieces: a
function that parses the lines of the file into individual fields and a
column specification.
readr supports the following file formats with these `read_*()`
functions:
- `read_csv()`: comma-separated values (CSV)
- `read_tsv()`: tab-separated values (TSV)
- `read_csv2()`: semicolon-separated values with `,` as the decimal mark
- `read_delim()`: delimited files (CSV and TSV are important special
cases)
- `read_fwf()`: fixed-width files
- `read_table()`: whitespace-separated files
- `read_log()`: web log files
A column specification describes how each column should be converted
from a character vector to a specific data type (e.g. character,
numeric, datetime, etc.). In the absence of a column specification,
readr will guess column types from the data. `vignette("column-types")`
gives more detail on how readr guesses the column types. Column type
guessing is very handy, especially during data exploration, but it’s
important to remember these are *just guesses*. As any data analysis
project matures past the exploratory phase, the best strategy is to
provide explicit column types.
The following example loads a sample file bundled with readr and guesses
the column types:
``` r
(chickens <- read_csv(readr_example("chickens.csv")))
#> Rows: 5 Columns: 4
#> ── Column specification ────────────────────────────────────────────────────────
#> Delimiter: ","
#> chr (3): chicken, sex, motto
#> dbl (1): eggs_laid
#>
#> ℹ Use `spec()` to retrieve the full column specification for this data.
#> ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#> # A tibble: 5 × 4
#> chicken sex eggs_laid motto
#>
#> 1 Foghorn Leghorn rooster 0 That's a joke, ah say, that's a jok…
#> 2 Chicken Little hen 3 The sky is falling!
#> 3 Ginger hen 12 Listen. We'll either die free chick…
#> 4 Camilla the Chicken hen 7 Bawk, buck, ba-gawk.
#> 5 Ernie The Giant Chicken rooster 0 Put Captain Solo in the cargo hold.
```
Note that readr prints the column types – the *guessed* column types, in
this case. This is useful because it allows you to check that the
columns have been read in as you expect. If they haven’t, that means you
need to provide the column specification. This sounds like a lot of
trouble, but luckily readr affords a nice workflow for this. Use
`spec()` to retrieve the (guessed) column specification from your
initial effort.
``` r
spec(chickens)
#> cols(
#> chicken = col_character(),
#> sex = col_character(),
#> eggs_laid = col_double(),
#> motto = col_character()
#> )
```
Now you can copy, paste, and tweak this, to create a more explicit readr
call that expresses the desired column types. Here we express that `sex`
should be a factor with levels `rooster` and `hen`, in that order, and
that `eggs_laid` should be integer.
``` r
chickens <- read_csv(
readr_example("chickens.csv"),
col_types = cols(
chicken = col_character(),
sex = col_factor(levels = c("rooster", "hen")),
eggs_laid = col_integer(),
motto = col_character()
)
)
chickens
#> # A tibble: 5 × 4
#> chicken sex eggs_laid motto
#>
#> 1 Foghorn Leghorn rooster 0 That's a joke, ah say, that's a jok…
#> 2 Chicken Little hen 3 The sky is falling!
#> 3 Ginger hen 12 Listen. We'll either die free chick…
#> 4 Camilla the Chicken hen 7 Bawk, buck, ba-gawk.
#> 5 Ernie The Giant Chicken rooster 0 Put Captain Solo in the cargo hold.
```
`vignette("readr")` gives an expanded introduction to readr.
## Editions
readr got a new parsing engine in version 2.0.0 (released July 2021). In
this so-called second edition, readr calls `vroom::vroom()`, by default.
The parsing engine in readr versions prior to 2.0.0 is now called the
first edition. If you’re using readr \>= 2.0.0, you can still access
first edition parsing via the functions `with_edition(1, ...)` and
`local_edition(1)`. And, obviously, if you’re using readr \< 2.0.0, you
will get first edition parsing, by definition, because that’s all there
is.
We will continue to support the first edition for a number of releases,
but the overall goal is to make the second edition uniformly better than
the first. Therefore the plan is to eventually deprecate and then remove
the first edition code. New code and actively-maintained code should use
the second edition. The workarounds `with_edition(1, ...)` and
`local_edition(1)` are offered as a pragmatic way to patch up legacy
code or as a temporary solution for infelicities identified as the
second edition matures.
## Alternatives
There are two main alternatives to readr: base R and data.table’s
`fread()`. The most important differences are discussed below.
### Base R
Compared to the corresponding base functions, readr functions:
- Use a consistent naming scheme for the parameters (e.g. `col_names`
and `col_types` not `header` and `colClasses`).
- Are generally much faster (up to 10x-100x) depending on the dataset.
- Leave strings as is by default, and automatically parse common
date/time formats.
- Have a helpful progress bar if loading is going to take a while.
- All functions work exactly the same way regardless of the current
locale. To override the US-centric defaults, use `locale()`.
### data.table and `fread()`
[data.table](https://github.com/Rdatatable/data.table) has a function
similar to `read_csv()` called `fread()`. Compared to `fread()`, readr
functions:
- Are sometimes slower, particularly on numeric heavy data.
- Can automatically guess some parameters, but basically encourage
explicit specification of, e.g., the delimiter, skipped rows, and the
header row.
- Follow tidyverse-wide conventions, such as returning a tibble, a
standard approach for column name repair, and a common mini-language
for column selection.
## Acknowledgements
Thanks to:
- [Joe Cheng](https://github.com/jcheng5) for showing me the beauty of
deterministic finite automata for parsing, and for teaching me why I
should write a tokenizer.
- [JJ Allaire](https://github.com/jjallaire) for helping me come up with
a design that makes very few copies, and is easy to extend.
- [Dirk Eddelbuettel](http://dirk.eddelbuettel.com) for coming up with
the name!
readr/man/ 0000755 0001762 0000144 00000000000 14510343737 012131 5 ustar ligges users readr/man/cols.Rd 0000644 0001762 0000144 00000004712 14174704674 013373 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/col_types.R
\name{cols}
\alias{cols}
\alias{cols_only}
\title{Create column specification}
\usage{
cols(..., .default = col_guess())
cols_only(...)
}
\arguments{
\item{...}{Either column objects created by \verb{col_*()}, or their abbreviated
character names (as described in the \code{col_types} argument of
\code{\link[=read_delim]{read_delim()}}). If you're only overriding a few columns, it's
best to refer to columns by name. If not named, the column types must match
the column names exactly.}
\item{.default}{Any named columns not explicitly overridden in \code{...}
will be read with this column type.}
}
\description{
\code{cols()} includes all columns in the input data, guessing the column types
as the default. \code{cols_only()} includes only the columns you explicitly
specify, skipping the rest. In general you can substitute \code{list()} for
\code{cols()} without changing the behavior.
}
\details{
The available specifications are: (with string abbreviations in brackets)
\itemize{
\item \code{col_logical()} [l], containing only \code{T}, \code{F}, \code{TRUE} or \code{FALSE}.
\item \code{col_integer()} [i], integers.
\item \code{col_double()} [d], doubles.
\item \code{col_character()} [c], everything else.
\item \code{col_factor(levels, ordered)} [f], a fixed set of values.
\item \code{col_date(format = "")} [D]: with the locale's \code{date_format}.
\item \code{col_time(format = "")} [t]: with the locale's \code{time_format}.
\item \code{col_datetime(format = "")} [T]: ISO8601 date times
\item \code{col_number()} [n], numbers containing the \code{grouping_mark}
\item \code{col_skip()} [_, -], don't import this column.
\item \code{col_guess()} [?], parse using the "best" type based on the input.
}
}
\examples{
cols(a = col_integer())
cols_only(a = col_integer())
# You can also use the standard abbreviations
cols(a = "i")
cols(a = "i", b = "d", c = "_")
# You can also use multiple sets of column definitions by combining
# them like so:
t1 <- cols(
column_one = col_integer(),
column_two = col_number()
)
t2 <- cols(
column_three = col_character()
)
t3 <- t1
t3$cols <- c(t1$cols, t2$cols)
t3
}
\seealso{
Other parsers:
\code{\link{col_skip}()},
\code{\link{cols_condense}()},
\code{\link{parse_datetime}()},
\code{\link{parse_factor}()},
\code{\link{parse_guess}()},
\code{\link{parse_logical}()},
\code{\link{parse_number}()},
\code{\link{parse_vector}()}
}
\concept{parsers}
readr/man/melt_delim_chunked.Rd 0000644 0001762 0000144 00000012065 14304131171 016224 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/melt_delim_chunked.R
\name{melt_delim_chunked}
\alias{melt_delim_chunked}
\alias{melt_csv_chunked}
\alias{melt_csv2_chunked}
\alias{melt_tsv_chunked}
\title{Melt a delimited file by chunks}
\usage{
melt_delim_chunked(
file,
callback,
chunk_size = 10000,
delim,
quote = "\\"",
escape_backslash = FALSE,
escape_double = TRUE,
locale = default_locale(),
na = c("", "NA"),
quoted_na = TRUE,
comment = "",
trim_ws = FALSE,
skip = 0,
progress = show_progress(),
skip_empty_rows = FALSE
)
melt_csv_chunked(
file,
callback,
chunk_size = 10000,
locale = default_locale(),
na = c("", "NA"),
quoted_na = TRUE,
quote = "\\"",
comment = "",
trim_ws = TRUE,
skip = 0,
progress = show_progress(),
skip_empty_rows = FALSE
)
melt_csv2_chunked(
file,
callback,
chunk_size = 10000,
locale = default_locale(),
na = c("", "NA"),
quoted_na = TRUE,
quote = "\\"",
comment = "",
trim_ws = TRUE,
skip = 0,
progress = show_progress(),
skip_empty_rows = FALSE
)
melt_tsv_chunked(
file,
callback,
chunk_size = 10000,
locale = default_locale(),
na = c("", "NA"),
quoted_na = TRUE,
quote = "\\"",
comment = "",
trim_ws = TRUE,
skip = 0,
progress = show_progress(),
skip_empty_rows = FALSE
)
}
\arguments{
\item{file}{Either a path to a file, a connection, or literal data
(either a single string or a raw vector).
Files ending in \code{.gz}, \code{.bz2}, \code{.xz}, or \code{.zip} will
be automatically uncompressed. Files starting with \verb{http://},
\verb{https://}, \verb{ftp://}, or \verb{ftps://} will be automatically
downloaded. Remote gz files can also be automatically downloaded and
decompressed.
Literal data is most useful for examples and tests. To be recognised as
literal data, the input must be either wrapped with \code{I()}, be a string
containing at least one new line, or be a vector containing at least one
string with a new line.
Using a value of \code{\link[=clipboard]{clipboard()}} will read from the system clipboard.}
\item{callback}{A callback function to call on each chunk}
\item{chunk_size}{The number of rows to include in each chunk}
\item{delim}{Single character used to separate fields within a record.}
\item{quote}{Single character used to quote strings.}
\item{escape_backslash}{Does the file use backslashes to escape special
characters? This is more general than \code{escape_double} as backslashes
can be used to escape the delimiter character, the quote character, or
to add special characters like \verb{\\\\n}.}
\item{escape_double}{Does the file escape quotes by doubling them?
i.e. If this option is \code{TRUE}, the value \verb{""""} represents
a single quote, \verb{\\"}.}
\item{locale}{The locale controls defaults that vary from place to place.
The default locale is US-centric (like R), but you can use
\code{\link[=locale]{locale()}} to create your own locale that controls things like
the default time zone, encoding, decimal mark, big mark, and day/month
names.}
\item{na}{Character vector of strings to interpret as missing values. Set this
option to \code{character()} to indicate no missing values.}
\item{quoted_na}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} Should missing values
inside quotes be treated as missing values (the default) or strings. This
parameter is soft deprecated as of readr 2.0.0.}
\item{comment}{A string used to identify comments. Any text after the
comment characters will be silently ignored.}
\item{trim_ws}{Should leading and trailing whitespace (ASCII spaces and tabs) be trimmed from
each field before parsing it?}
\item{skip}{Number of lines to skip before reading data. If \code{comment} is
supplied any commented lines are ignored \emph{after} skipping.}
\item{progress}{Display a progress bar? By default it will only display
in an interactive session and not while knitting a document. The automatic
progress bar can be disabled by setting option \code{readr.show_progress} to
\code{FALSE}.}
\item{skip_empty_rows}{Should blank rows be ignored altogether? i.e. If this
option is \code{TRUE} then blank rows will not be represented at all. If it is
\code{FALSE} then they will be represented by \code{NA} values in all the columns.}
}
\description{
For certain non-rectangular data formats, it can be useful to parse the data
into a melted format where each row represents a single token.
}
\details{
\code{melt_delim_chunked()} and the specialisations \code{melt_csv_chunked()},
\code{melt_csv2_chunked()} and \code{melt_tsv_chunked()} read files by a chunk of rows
at a time, executing a given function on one chunk before reading the next.
}
\examples{
# Cars with 3 gears
f <- function(x, pos) subset(x, data_type == "integer")
melt_csv_chunked(readr_example("mtcars.csv"), DataFrameCallback$new(f), chunk_size = 5)
}
\seealso{
Other chunked:
\code{\link{callback}},
\code{\link{read_delim_chunked}()},
\code{\link{read_lines_chunked}()}
}
\concept{chunked}
\keyword{internal}
readr/man/parse_atomic.Rd 0000644 0001762 0000144 00000004264 14174704674 015103 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/collectors.R
\name{parse_atomic}
\alias{parse_logical}
\alias{parse_integer}
\alias{parse_double}
\alias{parse_character}
\alias{col_logical}
\alias{col_integer}
\alias{col_double}
\alias{col_character}
\title{Parse logicals, integers, and reals}
\usage{
parse_logical(x, na = c("", "NA"), locale = default_locale(), trim_ws = TRUE)
parse_integer(x, na = c("", "NA"), locale = default_locale(), trim_ws = TRUE)
parse_double(x, na = c("", "NA"), locale = default_locale(), trim_ws = TRUE)
parse_character(x, na = c("", "NA"), locale = default_locale(), trim_ws = TRUE)
col_logical()
col_integer()
col_double()
col_character()
}
\arguments{
\item{x}{Character vector of values to parse.}
\item{na}{Character vector of strings to interpret as missing values. Set this
option to \code{character()} to indicate no missing values.}
\item{locale}{The locale controls defaults that vary from place to place.
The default locale is US-centric (like R), but you can use
\code{\link[=locale]{locale()}} to create your own locale that controls things like
the default time zone, encoding, decimal mark, big mark, and day/month
names.}
\item{trim_ws}{Should leading and trailing whitespace (ASCII spaces and tabs) be trimmed from
each field before parsing it?}
}
\description{
Use \verb{parse_*()} if you have a character vector you want to parse. Use
\verb{col_*()} in conjunction with a \verb{read_*()} function to parse the
values as they're read in.
}
\examples{
parse_integer(c("1", "2", "3"))
parse_double(c("1", "2", "3.123"))
parse_number("$1,123,456.00")
# Use locale to override default decimal and grouping marks
es_MX <- locale("es", decimal_mark = ",")
parse_number("$1.123.456,00", locale = es_MX)
# Invalid values are replaced with missing values with a warning.
x <- c("1", "2", "3", "-")
parse_double(x)
# Or flag values as missing
parse_double(x, na = "-")
}
\seealso{
Other parsers:
\code{\link{col_skip}()},
\code{\link{cols_condense}()},
\code{\link{cols}()},
\code{\link{parse_datetime}()},
\code{\link{parse_factor}()},
\code{\link{parse_guess}()},
\code{\link{parse_number}()},
\code{\link{parse_vector}()}
}
\concept{parsers}
readr/man/spec_delim.Rd 0000644 0001762 0000144 00000026106 14510343737 014531 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/read_delim.R, R/read_table.R
\name{spec_delim}
\alias{spec_delim}
\alias{spec_csv}
\alias{spec_csv2}
\alias{spec_tsv}
\alias{spec_table}
\title{Generate a column specification}
\usage{
spec_delim(
file,
delim = NULL,
quote = "\\"",
escape_backslash = FALSE,
escape_double = TRUE,
col_names = TRUE,
col_types = list(),
col_select = NULL,
id = NULL,
locale = default_locale(),
na = c("", "NA"),
quoted_na = TRUE,
comment = "",
trim_ws = FALSE,
skip = 0,
n_max = 0,
guess_max = 1000,
name_repair = "unique",
num_threads = readr_threads(),
progress = show_progress(),
show_col_types = should_show_types(),
skip_empty_rows = TRUE,
lazy = should_read_lazy()
)
spec_csv(
file,
col_names = TRUE,
col_types = list(),
col_select = NULL,
id = NULL,
locale = default_locale(),
na = c("", "NA"),
quoted_na = TRUE,
quote = "\\"",
comment = "",
trim_ws = TRUE,
skip = 0,
n_max = 0,
guess_max = 1000,
name_repair = "unique",
num_threads = readr_threads(),
progress = show_progress(),
show_col_types = should_show_types(),
skip_empty_rows = TRUE,
lazy = should_read_lazy()
)
spec_csv2(
file,
col_names = TRUE,
col_types = list(),
col_select = NULL,
id = NULL,
locale = default_locale(),
na = c("", "NA"),
quoted_na = TRUE,
quote = "\\"",
comment = "",
trim_ws = TRUE,
skip = 0,
n_max = 0,
guess_max = 1000,
progress = show_progress(),
name_repair = "unique",
num_threads = readr_threads(),
show_col_types = should_show_types(),
skip_empty_rows = TRUE,
lazy = should_read_lazy()
)
spec_tsv(
file,
col_names = TRUE,
col_types = list(),
col_select = NULL,
id = NULL,
locale = default_locale(),
na = c("", "NA"),
quoted_na = TRUE,
quote = "\\"",
comment = "",
trim_ws = TRUE,
skip = 0,
n_max = 0,
guess_max = 1000,
progress = show_progress(),
name_repair = "unique",
num_threads = readr_threads(),
show_col_types = should_show_types(),
skip_empty_rows = TRUE,
lazy = should_read_lazy()
)
spec_table(
file,
col_names = TRUE,
col_types = list(),
locale = default_locale(),
na = "NA",
skip = 0,
n_max = 0,
guess_max = 1000,
progress = show_progress(),
comment = "",
show_col_types = should_show_types(),
skip_empty_rows = TRUE
)
}
\arguments{
\item{file}{Either a path to a file, a connection, or literal data
(either a single string or a raw vector).
Files ending in \code{.gz}, \code{.bz2}, \code{.xz}, or \code{.zip} will
be automatically uncompressed. Files starting with \verb{http://},
\verb{https://}, \verb{ftp://}, or \verb{ftps://} will be automatically
downloaded. Remote gz files can also be automatically downloaded and
decompressed.
Literal data is most useful for examples and tests. To be recognised as
literal data, the input must be either wrapped with \code{I()}, be a string
containing at least one new line, or be a vector containing at least one
string with a new line.
Using a value of \code{\link[=clipboard]{clipboard()}} will read from the system clipboard.}
\item{delim}{Single character used to separate fields within a record.}
\item{quote}{Single character used to quote strings.}
\item{escape_backslash}{Does the file use backslashes to escape special
characters? This is more general than \code{escape_double} as backslashes
can be used to escape the delimiter character, the quote character, or
to add special characters like \verb{\\\\n}.}
\item{escape_double}{Does the file escape quotes by doubling them?
i.e. If this option is \code{TRUE}, the value \verb{""""} represents
a single quote, \verb{\\"}.}
\item{col_names}{Either \code{TRUE}, \code{FALSE} or a character vector
of column names.
If \code{TRUE}, the first row of the input will be used as the column
names, and will not be included in the data frame. If \code{FALSE}, column
names will be generated automatically: X1, X2, X3 etc.
If \code{col_names} is a character vector, the values will be used as the
names of the columns, and the first row of the input will be read into
the first row of the output data frame.
Missing (\code{NA}) column names will generate a warning, and be filled
in with dummy names \code{...1}, \code{...2} etc. Duplicate column names
will generate a warning and be made unique, see \code{name_repair} to control
how this is done.}
\item{col_types}{One of \code{NULL}, a \code{\link[=cols]{cols()}} specification, or
a string. See \code{vignette("readr")} for more details.
If \code{NULL}, all column types will be inferred from \code{guess_max} rows of the
input, interspersed throughout the file. This is convenient (and fast),
but not robust. If the guessed types are wrong, you'll need to increase
\code{guess_max} or supply the correct types yourself.
Column specifications created by \code{\link[=list]{list()}} or \code{\link[=cols]{cols()}} must contain
one column specification for each column. If you only want to read a
subset of the columns, use \code{\link[=cols_only]{cols_only()}}.
Alternatively, you can use a compact string representation where each
character represents one column:
\itemize{
\item c = character
\item i = integer
\item n = number
\item d = double
\item l = logical
\item f = factor
\item D = date
\item T = date time
\item t = time
\item ? = guess
\item _ or - = skip
}
By default, reading a file without a column specification will print a
message showing what \code{readr} guessed they were. To remove this message,
set \code{show_col_types = FALSE} or set \code{options(readr.show_col_types = FALSE)}.}
\item{col_select}{Columns to include in the results. You can use the same
mini-language as \code{dplyr::select()} to refer to the columns by name. Use
\code{c()} to use more than one selection expression. Although this
usage is less common, \code{col_select} also accepts a numeric column index. See
\code{\link[tidyselect:language]{?tidyselect::language}} for full details on the
selection language.}
\item{id}{The name of a column in which to store the file path. This is
useful when reading multiple input files and there is data in the file
paths, such as the data collection date. If \code{NULL} (the default) no extra
column is created.}
\item{locale}{The locale controls defaults that vary from place to place.
The default locale is US-centric (like R), but you can use
\code{\link[=locale]{locale()}} to create your own locale that controls things like
the default time zone, encoding, decimal mark, big mark, and day/month
names.}
\item{na}{Character vector of strings to interpret as missing values. Set this
option to \code{character()} to indicate no missing values.}
\item{quoted_na}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} Should missing values
inside quotes be treated as missing values (the default) or strings. This
parameter is soft deprecated as of readr 2.0.0.}
\item{comment}{A string used to identify comments. Any text after the
comment characters will be silently ignored.}
\item{trim_ws}{Should leading and trailing whitespace (ASCII spaces and tabs) be trimmed from
each field before parsing it?}
\item{skip}{Number of lines to skip before reading data. If \code{comment} is
supplied any commented lines are ignored \emph{after} skipping.}
\item{n_max}{Maximum number of lines to read.}
\item{guess_max}{Maximum number of lines to use for guessing column types.
Will never use more than the number of lines read.
See \code{vignette("column-types", package = "readr")} for more details.}
\item{name_repair}{Handling of column names. The default behaviour is to
ensure column names are \code{"unique"}. Various repair strategies are
supported:
\itemize{
\item \code{"minimal"}: No name repair or checks, beyond basic existence of names.
\item \code{"unique"} (default value): Make sure names are unique and not empty.
\item \code{"check_unique"}: No name repair, but check they are \code{unique}.
\item \code{"unique_quiet"}: Repair with the \code{unique} strategy, quietly.
\item \code{"universal"}: Make the names \code{unique} and syntactic.
\item \code{"universal_quiet"}: Repair with the \code{universal} strategy, quietly.
\item A function: Apply custom name repair (e.g., \code{name_repair = make.names}
for names in the style of base R).
\item A purrr-style anonymous function, see \code{\link[rlang:as_function]{rlang::as_function()}}.
}
This argument is passed on as \code{repair} to \code{\link[vctrs:vec_as_names]{vctrs::vec_as_names()}}.
See there for more details on these terms and the strategies used
to enforce them.}
\item{num_threads}{The number of processing threads to use for initial
parsing and lazy reading of data. If your data contains newlines within
fields the parser should automatically detect this and fall back to using
one thread only. However if you know your file has newlines within quoted
fields it is safest to set \code{num_threads = 1} explicitly.}
\item{progress}{Display a progress bar? By default it will only display
in an interactive session and not while knitting a document. The automatic
progress bar can be disabled by setting option \code{readr.show_progress} to
\code{FALSE}.}
\item{show_col_types}{If \code{FALSE}, do not show the guessed column types. If
\code{TRUE} always show the column types, even if they are supplied. If \code{NULL}
(the default) only show the column types if they are not explicitly supplied
by the \code{col_types} argument.}
\item{skip_empty_rows}{Should blank rows be ignored altogether? i.e. If this
option is \code{TRUE} then blank rows will not be represented at all. If it is
\code{FALSE} then they will be represented by \code{NA} values in all the columns.}
\item{lazy}{Read values lazily? By default, this is \code{FALSE}, because there
are special considerations when reading a file lazily that have tripped up
some users. Specifically, things get tricky when reading and then writing
back into the same file. But, in general, lazy reading (\code{lazy = TRUE}) has
many benefits, especially for interactive use and when your downstream work
only involves a subset of the rows or columns.
Learn more in \code{\link[=should_read_lazy]{should_read_lazy()}} and in the documentation for the
\code{altrep} argument of \code{\link[vroom:vroom]{vroom::vroom()}}.}
}
\value{
The \code{col_spec} generated for the file.
}
\description{
When printed, only the first 20 columns are printed by default. To override,
set \code{options(readr.num_columns)} can be used to modify this (a value of 0
turns off printing).
}
\examples{
# Input sources -------------------------------------------------------------
# Retrieve specs from a path
spec_csv(system.file("extdata/mtcars.csv", package = "readr"))
spec_csv(system.file("extdata/mtcars.csv.zip", package = "readr"))
# Or directly from a string (must contain a newline)
spec_csv(I("x,y\n1,2\n3,4"))
# Column types --------------------------------------------------------------
# By default, readr guesses the columns types, looking at 1000 rows
# throughout the file.
# You can specify the number of rows used with guess_max.
spec_csv(system.file("extdata/mtcars.csv", package = "readr"), guess_max = 20)
}
readr/man/read_lines_chunked.Rd 0000644 0001762 0000144 00000004453 14174704674 016243 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/read_lines_chunked.R
\name{read_lines_chunked}
\alias{read_lines_chunked}
\alias{read_lines_raw_chunked}
\title{Read lines from a file or string by chunk.}
\usage{
read_lines_chunked(
file,
callback,
chunk_size = 10000,
skip = 0,
locale = default_locale(),
na = character(),
progress = show_progress()
)
read_lines_raw_chunked(
file,
callback,
chunk_size = 10000,
skip = 0,
progress = show_progress()
)
}
\arguments{
\item{file}{Either a path to a file, a connection, or literal data
(either a single string or a raw vector).
Files ending in \code{.gz}, \code{.bz2}, \code{.xz}, or \code{.zip} will
be automatically uncompressed. Files starting with \verb{http://},
\verb{https://}, \verb{ftp://}, or \verb{ftps://} will be automatically
downloaded. Remote gz files can also be automatically downloaded and
decompressed.
Literal data is most useful for examples and tests. To be recognised as
literal data, the input must be either wrapped with \code{I()}, be a string
containing at least one new line, or be a vector containing at least one
string with a new line.
Using a value of \code{\link[=clipboard]{clipboard()}} will read from the system clipboard.}
\item{callback}{A callback function to call on each chunk}
\item{chunk_size}{The number of rows to include in each chunk}
\item{skip}{Number of lines to skip before reading data.}
\item{locale}{The locale controls defaults that vary from place to place.
The default locale is US-centric (like R), but you can use
\code{\link[=locale]{locale()}} to create your own locale that controls things like
the default time zone, encoding, decimal mark, big mark, and day/month
names.}
\item{na}{Character vector of strings to interpret as missing values. Set this
option to \code{character()} to indicate no missing values.}
\item{progress}{Display a progress bar? By default it will only display
in an interactive session and not while knitting a document. The automatic
progress bar can be disabled by setting option \code{readr.show_progress} to
\code{FALSE}.}
}
\description{
Read lines from a file or string by chunk.
}
\seealso{
Other chunked:
\code{\link{callback}},
\code{\link{melt_delim_chunked}()},
\code{\link{read_delim_chunked}()}
}
\concept{chunked}
\keyword{internal}
readr/man/parse_guess.Rd 0000644 0001762 0000144 00000004107 14174704674 014751 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/collectors.R
\name{parse_guess}
\alias{parse_guess}
\alias{col_guess}
\alias{guess_parser}
\title{Parse using the "best" type}
\usage{
parse_guess(
x,
na = c("", "NA"),
locale = default_locale(),
trim_ws = TRUE,
guess_integer = FALSE
)
col_guess()
guess_parser(
x,
locale = default_locale(),
guess_integer = FALSE,
na = c("", "NA")
)
}
\arguments{
\item{x}{Character vector of values to parse.}
\item{na}{Character vector of strings to interpret as missing values. Set this
option to \code{character()} to indicate no missing values.}
\item{locale}{The locale controls defaults that vary from place to place.
The default locale is US-centric (like R), but you can use
\code{\link[=locale]{locale()}} to create your own locale that controls things like
the default time zone, encoding, decimal mark, big mark, and day/month
names.}
\item{trim_ws}{Should leading and trailing whitespace (ASCII spaces and tabs) be trimmed from
each field before parsing it?}
\item{guess_integer}{If \code{TRUE}, guess integer types for whole numbers, if
\code{FALSE} guess numeric type for all numbers.}
}
\description{
\code{parse_guess()} returns the parser vector; \code{guess_parser()}
returns the name of the parser. These functions use a number of heuristics
to determine which type of vector is "best". Generally they try to err of
the side of safety, as it's straightforward to override the parsing choice
if needed.
}
\examples{
# Logical vectors
parse_guess(c("FALSE", "TRUE", "F", "T"))
# Integers and doubles
parse_guess(c("1", "2", "3"))
parse_guess(c("1.6", "2.6", "3.4"))
# Numbers containing grouping mark
guess_parser("1,234,566")
parse_guess("1,234,566")
# ISO 8601 date times
guess_parser(c("2010-10-10"))
parse_guess(c("2010-10-10"))
}
\seealso{
Other parsers:
\code{\link{col_skip}()},
\code{\link{cols_condense}()},
\code{\link{cols}()},
\code{\link{parse_datetime}()},
\code{\link{parse_factor}()},
\code{\link{parse_logical}()},
\code{\link{parse_number}()},
\code{\link{parse_vector}()}
}
\concept{parsers}
readr/man/format_delim.Rd 0000644 0001762 0000144 00000011112 14304131311 015036 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/write.R
\name{format_delim}
\alias{format_delim}
\alias{format_csv}
\alias{format_csv2}
\alias{format_tsv}
\title{Convert a data frame to a delimited string}
\usage{
format_delim(
x,
delim,
na = "NA",
append = FALSE,
col_names = !append,
quote = c("needed", "all", "none"),
escape = c("double", "backslash", "none"),
eol = "\\n",
quote_escape = deprecated()
)
format_csv(
x,
na = "NA",
append = FALSE,
col_names = !append,
quote = c("needed", "all", "none"),
escape = c("double", "backslash", "none"),
eol = "\\n",
quote_escape = deprecated()
)
format_csv2(
x,
na = "NA",
append = FALSE,
col_names = !append,
quote = c("needed", "all", "none"),
escape = c("double", "backslash", "none"),
eol = "\\n",
quote_escape = deprecated()
)
format_tsv(
x,
na = "NA",
append = FALSE,
col_names = !append,
quote = c("needed", "all", "none"),
escape = c("double", "backslash", "none"),
eol = "\\n",
quote_escape = deprecated()
)
}
\arguments{
\item{x}{A data frame.}
\item{delim}{Delimiter used to separate values. Defaults to \code{" "} for \code{write_delim()}, \code{","} for \code{write_excel_csv()} and
\code{";"} for \code{write_excel_csv2()}. Must be a single character.}
\item{na}{String used for missing values. Defaults to NA. Missing values
will never be quoted; strings with the same value as \code{na} will
always be quoted.}
\item{append}{If \code{FALSE}, will overwrite existing file. If \code{TRUE},
will append to existing file. In both cases, if the file does not exist a new
file is created.}
\item{col_names}{If \code{FALSE}, column names will not be included at the top of the file. If \code{TRUE},
column names will be included. If not specified, \code{col_names} will take the opposite value given to \code{append}.}
\item{quote}{How to handle fields which contain characters that need to be
quoted.
\itemize{
\item \code{needed} - Values are only quoted if needed: if they contain a delimiter,
quote, or newline.
\item \code{all} - Quote all fields.
\item \code{none} - Never quote fields.
}}
\item{escape}{The type of escape to use when quotes are in the data.
\itemize{
\item \code{double} - quotes are escaped by doubling them.
\item \code{backslash} - quotes are escaped by a preceding backslash.
\item \code{none} - quotes are not escaped.
}}
\item{eol}{The end of line character to use. Most commonly either \code{"\\n"} for
Unix style newlines, or \code{"\\r\\n"} for Windows style newlines.}
\item{quote_escape}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} Use the \code{escape}
argument instead.}
}
\value{
A string.
}
\description{
These functions are equivalent to \code{\link[=write_csv]{write_csv()}} etc., but instead
of writing to disk, they return a string.
}
\section{Output}{
Factors are coerced to character. Doubles are formatted to a decimal string
using the grisu3 algorithm. \code{POSIXct} values are formatted as ISO8601 with a
UTC timezone \emph{Note: \code{POSIXct} objects in local or non-UTC timezones will be
converted to UTC time before writing.}
All columns are encoded as UTF-8. \code{write_excel_csv()} and \code{write_excel_csv2()} also include a
\href{https://en.wikipedia.org/wiki/Byte_order_mark}{UTF-8 Byte order mark}
which indicates to Excel the csv is UTF-8 encoded.
\code{write_excel_csv2()} and \code{write_csv2} were created to allow users with
different locale settings to save .csv files using their default settings
(e.g. \verb{;} as the column separator and \verb{,} as the decimal separator).
This is common in some European countries.
Values are only quoted if they contain a comma, quote or newline.
The \verb{write_*()} functions will automatically compress outputs if an appropriate extension is given.
Three extensions are currently supported: \code{.gz} for gzip compression, \code{.bz2} for bzip2 compression and
\code{.xz} for lzma compression. See the examples for more information.
}
\examples{
# format_()* functions are useful for testing and reprexes
cat(format_csv(mtcars))
cat(format_tsv(mtcars))
cat(format_delim(mtcars, ";"))
# Specifying missing values
df <- data.frame(x = c(1, NA, 3))
format_csv(df, na = "missing")
# Quotes are automatically added as needed
df <- data.frame(x = c("a ", '"', ",", "\n"))
cat(format_csv(df))
}
\references{
Florian Loitsch, Printing Floating-Point Numbers Quickly and
Accurately with Integers, PLDI '10,
\url{http://www.cs.tufts.edu/~nr/cs257/archive/florian-loitsch/printf.pdf}
}
readr/man/write_delim.Rd 0000644 0001762 0000144 00000015423 14304131311 014711 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/write.R
\name{write_delim}
\alias{write_delim}
\alias{write_csv}
\alias{write_csv2}
\alias{write_excel_csv}
\alias{write_excel_csv2}
\alias{write_tsv}
\title{Write a data frame to a delimited file}
\usage{
write_delim(
x,
file,
delim = " ",
na = "NA",
append = FALSE,
col_names = !append,
quote = c("needed", "all", "none"),
escape = c("double", "backslash", "none"),
eol = "\\n",
num_threads = readr_threads(),
progress = show_progress(),
path = deprecated(),
quote_escape = deprecated()
)
write_csv(
x,
file,
na = "NA",
append = FALSE,
col_names = !append,
quote = c("needed", "all", "none"),
escape = c("double", "backslash", "none"),
eol = "\\n",
num_threads = readr_threads(),
progress = show_progress(),
path = deprecated(),
quote_escape = deprecated()
)
write_csv2(
x,
file,
na = "NA",
append = FALSE,
col_names = !append,
quote = c("needed", "all", "none"),
escape = c("double", "backslash", "none"),
eol = "\\n",
num_threads = readr_threads(),
progress = show_progress(),
path = deprecated(),
quote_escape = deprecated()
)
write_excel_csv(
x,
file,
na = "NA",
append = FALSE,
col_names = !append,
delim = ",",
quote = "all",
escape = c("double", "backslash", "none"),
eol = "\\n",
num_threads = readr_threads(),
progress = show_progress(),
path = deprecated(),
quote_escape = deprecated()
)
write_excel_csv2(
x,
file,
na = "NA",
append = FALSE,
col_names = !append,
delim = ";",
quote = "all",
escape = c("double", "backslash", "none"),
eol = "\\n",
num_threads = readr_threads(),
progress = show_progress(),
path = deprecated(),
quote_escape = deprecated()
)
write_tsv(
x,
file,
na = "NA",
append = FALSE,
col_names = !append,
quote = "none",
escape = c("double", "backslash", "none"),
eol = "\\n",
num_threads = readr_threads(),
progress = show_progress(),
path = deprecated(),
quote_escape = deprecated()
)
}
\arguments{
\item{x}{A data frame or tibble to write to disk.}
\item{file}{File or connection to write to.}
\item{delim}{Delimiter used to separate values. Defaults to \code{" "} for \code{write_delim()}, \code{","} for \code{write_excel_csv()} and
\code{";"} for \code{write_excel_csv2()}. Must be a single character.}
\item{na}{String used for missing values. Defaults to NA. Missing values
will never be quoted; strings with the same value as \code{na} will
always be quoted.}
\item{append}{If \code{FALSE}, will overwrite existing file. If \code{TRUE},
will append to existing file. In both cases, if the file does not exist a new
file is created.}
\item{col_names}{If \code{FALSE}, column names will not be included at the top of the file. If \code{TRUE},
column names will be included. If not specified, \code{col_names} will take the opposite value given to \code{append}.}
\item{quote}{How to handle fields which contain characters that need to be
quoted.
\itemize{
\item \code{needed} - Values are only quoted if needed: if they contain a delimiter,
quote, or newline.
\item \code{all} - Quote all fields.
\item \code{none} - Never quote fields.
}}
\item{escape}{The type of escape to use when quotes are in the data.
\itemize{
\item \code{double} - quotes are escaped by doubling them.
\item \code{backslash} - quotes are escaped by a preceding backslash.
\item \code{none} - quotes are not escaped.
}}
\item{eol}{The end of line character to use. Most commonly either \code{"\\n"} for
Unix style newlines, or \code{"\\r\\n"} for Windows style newlines.}
\item{num_threads}{Number of threads to use when reading and materializing
vectors. If your data contains newlines within fields the parser will
automatically be forced to use a single thread only.}
\item{progress}{Display a progress bar? By default it will only display
in an interactive session and not while knitting a document. The display
is updated every 50,000 values and will only display if estimated reading
time is 5 seconds or more. The automatic progress bar can be disabled by
setting option \code{readr.show_progress} to \code{FALSE}.}
\item{path}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} Use the \code{file} argument
instead.}
\item{quote_escape}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} Use the \code{escape}
argument instead.}
}
\value{
\verb{write_*()} returns the input \code{x} invisibly.
}
\description{
The \verb{write_*()} family of functions are an improvement to analogous function such
as \code{\link[=write.csv]{write.csv()}} because they are approximately twice as fast. Unlike \code{\link[=write.csv]{write.csv()}},
these functions do not include row names as a column in the written file.
A generic function, \code{output_column()}, is applied to each variable
to coerce columns to suitable output.
}
\section{Output}{
Factors are coerced to character. Doubles are formatted to a decimal string
using the grisu3 algorithm. \code{POSIXct} values are formatted as ISO8601 with a
UTC timezone \emph{Note: \code{POSIXct} objects in local or non-UTC timezones will be
converted to UTC time before writing.}
All columns are encoded as UTF-8. \code{write_excel_csv()} and \code{write_excel_csv2()} also include a
\href{https://en.wikipedia.org/wiki/Byte_order_mark}{UTF-8 Byte order mark}
which indicates to Excel the csv is UTF-8 encoded.
\code{write_excel_csv2()} and \code{write_csv2} were created to allow users with
different locale settings to save .csv files using their default settings
(e.g. \verb{;} as the column separator and \verb{,} as the decimal separator).
This is common in some European countries.
Values are only quoted if they contain a comma, quote or newline.
The \verb{write_*()} functions will automatically compress outputs if an appropriate extension is given.
Three extensions are currently supported: \code{.gz} for gzip compression, \code{.bz2} for bzip2 compression and
\code{.xz} for lzma compression. See the examples for more information.
}
\examples{
\dontshow{
.old_wd <- setwd(tempdir())
}
# If only a file name is specified, write_()* will write
# the file to the current working directory.
write_csv(mtcars, "mtcars.csv")
write_tsv(mtcars, "mtcars.tsv")
# If you add an extension to the file name, write_()* will
# automatically compress the output.
write_tsv(mtcars, "mtcars.tsv.gz")
write_tsv(mtcars, "mtcars.tsv.bz2")
write_tsv(mtcars, "mtcars.tsv.xz")
\dontshow{
setwd(.old_wd)
}
}
\references{
Florian Loitsch, Printing Floating-Point Numbers Quickly and
Accurately with Integers, PLDI '10,
\url{http://www.cs.tufts.edu/~nr/cs257/archive/florian-loitsch/printf.pdf}
}
readr/man/problems.Rd 0000644 0001762 0000144 00000002151 14462256076 014247 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/problems.R
\name{problems}
\alias{problems}
\alias{stop_for_problems}
\title{Retrieve parsing problems}
\usage{
problems(x = .Last.value)
stop_for_problems(x)
}
\arguments{
\item{x}{A data frame (from \verb{read_*()}) or a vector (from \verb{parse_*()}).}
}
\value{
A data frame with one row for each problem and four columns:
\item{row,col}{Row and column of problem}
\item{expected}{What readr expected to find}
\item{actual}{What it actually got}
}
\description{
Readr functions will only throw an error if parsing fails in an unrecoverable
way. However, there are lots of potential problems that you might want to
know about - these are stored in the \code{problems} attribute of the
output, which you can easily access with this function.
\code{stop_for_problems()} will throw an error if there are any parsing
problems: this is useful for automated scripts where you want to throw
an error as soon as you encounter a problem.
}
\examples{
x <- parse_integer(c("1X", "blah", "3"))
problems(x)
y <- parse_integer(c("1", "2", "3"))
problems(y)
}
readr/man/melt_delim.Rd 0000644 0001762 0000144 00000014566 14304131171 014533 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/melt_delim.R
\name{melt_delim}
\alias{melt_delim}
\alias{melt_csv}
\alias{melt_csv2}
\alias{melt_tsv}
\title{Return melted data for each token in a delimited file (including csv & tsv)}
\usage{
melt_delim(
file,
delim,
quote = "\\"",
escape_backslash = FALSE,
escape_double = TRUE,
locale = default_locale(),
na = c("", "NA"),
quoted_na = TRUE,
comment = "",
trim_ws = FALSE,
skip = 0,
n_max = Inf,
progress = show_progress(),
skip_empty_rows = FALSE
)
melt_csv(
file,
locale = default_locale(),
na = c("", "NA"),
quoted_na = TRUE,
quote = "\\"",
comment = "",
trim_ws = TRUE,
skip = 0,
n_max = Inf,
progress = show_progress(),
skip_empty_rows = FALSE
)
melt_csv2(
file,
locale = default_locale(),
na = c("", "NA"),
quoted_na = TRUE,
quote = "\\"",
comment = "",
trim_ws = TRUE,
skip = 0,
n_max = Inf,
progress = show_progress(),
skip_empty_rows = FALSE
)
melt_tsv(
file,
locale = default_locale(),
na = c("", "NA"),
quoted_na = TRUE,
quote = "\\"",
comment = "",
trim_ws = TRUE,
skip = 0,
n_max = Inf,
progress = show_progress(),
skip_empty_rows = FALSE
)
}
\arguments{
\item{file}{Either a path to a file, a connection, or literal data
(either a single string or a raw vector).
Files ending in \code{.gz}, \code{.bz2}, \code{.xz}, or \code{.zip} will
be automatically uncompressed. Files starting with \verb{http://},
\verb{https://}, \verb{ftp://}, or \verb{ftps://} will be automatically
downloaded. Remote gz files can also be automatically downloaded and
decompressed.
Literal data is most useful for examples and tests. To be recognised as
literal data, the input must be either wrapped with \code{I()}, be a string
containing at least one new line, or be a vector containing at least one
string with a new line.
Using a value of \code{\link[=clipboard]{clipboard()}} will read from the system clipboard.}
\item{delim}{Single character used to separate fields within a record.}
\item{quote}{Single character used to quote strings.}
\item{escape_backslash}{Does the file use backslashes to escape special
characters? This is more general than \code{escape_double} as backslashes
can be used to escape the delimiter character, the quote character, or
to add special characters like \verb{\\\\n}.}
\item{escape_double}{Does the file escape quotes by doubling them?
i.e. If this option is \code{TRUE}, the value \verb{""""} represents
a single quote, \verb{\\"}.}
\item{locale}{The locale controls defaults that vary from place to place.
The default locale is US-centric (like R), but you can use
\code{\link[=locale]{locale()}} to create your own locale that controls things like
the default time zone, encoding, decimal mark, big mark, and day/month
names.}
\item{na}{Character vector of strings to interpret as missing values. Set this
option to \code{character()} to indicate no missing values.}
\item{quoted_na}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} Should missing values
inside quotes be treated as missing values (the default) or strings. This
parameter is soft deprecated as of readr 2.0.0.}
\item{comment}{A string used to identify comments. Any text after the
comment characters will be silently ignored.}
\item{trim_ws}{Should leading and trailing whitespace (ASCII spaces and tabs) be trimmed from
each field before parsing it?}
\item{skip}{Number of lines to skip before reading data. If \code{comment} is
supplied any commented lines are ignored \emph{after} skipping.}
\item{n_max}{Maximum number of lines to read.}
\item{progress}{Display a progress bar? By default it will only display
in an interactive session and not while knitting a document. The automatic
progress bar can be disabled by setting option \code{readr.show_progress} to
\code{FALSE}.}
\item{skip_empty_rows}{Should blank rows be ignored altogether? i.e. If this
option is \code{TRUE} then blank rows will not be represented at all. If it is
\code{FALSE} then they will be represented by \code{NA} values in all the columns.}
}
\value{
A \code{\link[=tibble]{tibble()}} of four columns:
\itemize{
\item \code{row}, the row that the token comes from in the original file
\item \code{col}, the column that the token comes from in the original file
\item \code{data_type}, the data type of the token, e.g. \code{"integer"}, \code{"character"},
\code{"date"}, guessed in a similar way to the \code{guess_parser()} function.
\item \code{value}, the token itself as a character string, unchanged from its
representation in the original file.
}
If there are parsing problems, a warning tells you
how many, and you can retrieve the details with \code{\link[=problems]{problems()}}.
}
\description{
\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#superseded}{\figure{lifecycle-superseded.svg}{options: alt='[Superseded]'}}}{\strong{[Superseded]}}
This function has been superseded in readr and moved to \href{https://r-lib.github.io/meltr/}{the meltr package}.
}
\details{
For certain non-rectangular data formats, it can be useful to parse the data
into a melted format where each row represents a single token.
\code{melt_csv()} and \code{melt_tsv()} are special cases of the general
\code{melt_delim()}. They're useful for reading the most common types of
flat file data, comma separated values and tab separated values,
respectively. \code{melt_csv2()} uses \verb{;} for the field separator and \verb{,} for the
decimal point. This is common in some European countries.
}
\examples{
# Input sources -------------------------------------------------------------
# Read from a path
melt_csv(readr_example("mtcars.csv"))
melt_csv(readr_example("mtcars.csv.zip"))
melt_csv(readr_example("mtcars.csv.bz2"))
\dontrun{
melt_csv("https://github.com/tidyverse/readr/raw/main/inst/extdata/mtcars.csv")
}
# Or directly from a string (must contain a newline)
melt_csv("x,y\n1,2\n3,4")
# To import empty cells as 'empty' rather than `NA`
melt_csv("x,y\n,NA,\"\",''", na = "NA")
# File types ----------------------------------------------------------------
melt_csv("a,b\n1.0,2.0")
melt_csv2("a;b\n1,0;2,0")
melt_tsv("a\tb\n1.0\t2.0")
melt_delim("a|b\n1.0|2.0", delim = "|")
}
\seealso{
\code{\link[=read_delim]{read_delim()}} for the conventional way to read rectangular data
from delimited files.
}
readr/man/read_builtin.Rd 0000644 0001762 0000144 00000001340 14174704674 015066 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/read_builtin.R
\name{read_builtin}
\alias{read_builtin}
\title{Read built-in object from package}
\usage{
read_builtin(x, package = NULL)
}
\arguments{
\item{x}{Name (character string) of data set to read.}
\item{package}{Name of package from which to find data set. By default, all
attached packages are searched and then the 'data' subdirectory (if present)
of the current working directory.}
}
\value{
An object of the built-in class of \code{x}.
}
\description{
Consistent wrapper around \code{\link[=data]{data()}} that forces the promise. This is also a
stronger parallel to loading data from a file.
}
\examples{
read_builtin("mtcars", "datasets")
}
readr/man/date_names.Rd 0000644 0001762 0000144 00000002125 14174704674 014527 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/date-symbols.R
\name{date_names}
\alias{date_names}
\alias{date_names_lang}
\alias{date_names_langs}
\title{Create or retrieve date names}
\usage{
date_names(mon, mon_ab = mon, day, day_ab = day, am_pm = c("AM", "PM"))
date_names_lang(language)
date_names_langs()
}
\arguments{
\item{mon, mon_ab}{Full and abbreviated month names.}
\item{day, day_ab}{Full and abbreviated week day names. Starts with Sunday.}
\item{am_pm}{Names used for AM and PM.}
\item{language}{A BCP 47 locale, made up of a language and a region,
e.g. \code{"en"} for American English. See \code{date_names_langs()}
for a complete list of available locales.}
}
\description{
When parsing dates, you often need to know how weekdays of the week and
months are represented as text. This pair of functions allows you to either
create your own, or retrieve from a standard list. The standard list is
derived from ICU (\verb{http://site.icu-project.org}) via the stringi package.
}
\examples{
date_names_lang("en")
date_names_lang("ko")
date_names_lang("fr")
}
readr/man/parse_number.Rd 0000644 0001762 0000144 00000004152 14315646511 015103 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/collectors.R
\name{parse_number}
\alias{parse_number}
\alias{col_number}
\title{Parse numbers, flexibly}
\usage{
parse_number(x, na = c("", "NA"), locale = default_locale(), trim_ws = TRUE)
col_number()
}
\arguments{
\item{x}{Character vector of values to parse.}
\item{na}{Character vector of strings to interpret as missing values. Set this
option to \code{character()} to indicate no missing values.}
\item{locale}{The locale controls defaults that vary from place to place.
The default locale is US-centric (like R), but you can use
\code{\link[=locale]{locale()}} to create your own locale that controls things like
the default time zone, encoding, decimal mark, big mark, and day/month
names.}
\item{trim_ws}{Should leading and trailing whitespace (ASCII spaces and tabs) be trimmed from
each field before parsing it?}
}
\value{
A numeric vector (double) of parsed numbers.
}
\description{
This parses the first number it finds, dropping any non-numeric characters
before the first number and all characters after the first number. The
grouping mark specified by the locale is ignored inside the number.
}
\examples{
## These all return 1000
parse_number("$1,000") ## leading `$` and grouping character `,` ignored
parse_number("euro1,000") ## leading non-numeric euro ignored
parse_number("t1000t1000") ## only parses first number found
parse_number("1,234.56")
## explicit locale specifying European grouping and decimal marks
parse_number("1.234,56", locale = locale(decimal_mark = ",", grouping_mark = "."))
## SI/ISO 31-0 standard spaces for number grouping
parse_number("1 234.56", locale = locale(decimal_mark = ".", grouping_mark = " "))
## Specifying strings for NAs
parse_number(c("1", "2", "3", "NA"))
parse_number(c("1", "2", "3", "NA", "Nothing"), na = c("NA", "Nothing"))
}
\seealso{
Other parsers:
\code{\link{col_skip}()},
\code{\link{cols_condense}()},
\code{\link{cols}()},
\code{\link{parse_datetime}()},
\code{\link{parse_factor}()},
\code{\link{parse_guess}()},
\code{\link{parse_logical}()},
\code{\link{parse_vector}()}
}
\concept{parsers}
readr/man/read_table2.Rd 0000644 0001762 0000144 00000001540 14174704674 014573 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/read_table.R
\name{read_table2}
\alias{read_table2}
\title{Read whitespace-separated columns into a tibble}
\usage{
read_table2(
file,
col_names = TRUE,
col_types = NULL,
locale = default_locale(),
na = "NA",
skip = 0,
n_max = Inf,
guess_max = min(n_max, 1000),
progress = show_progress(),
comment = "",
skip_empty_rows = TRUE
)
}
\description{
\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}}
This function is deprecated because we renamed it to \code{\link[=read_table]{read_table()}} and
removed the old \code{read_table} function, which was too strict for most cases
and was analogous to just using \code{read_fwf()}.
}
\keyword{internal}
readr/man/melt_fwf.Rd 0000644 0001762 0000144 00000010426 14174704674 014235 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/melt_fwf.R
\name{melt_fwf}
\alias{melt_fwf}
\title{Return melted data for each token in a fixed width file}
\usage{
melt_fwf(
file,
col_positions,
locale = default_locale(),
na = c("", "NA"),
comment = "",
trim_ws = TRUE,
skip = 0,
n_max = Inf,
progress = show_progress(),
skip_empty_rows = FALSE
)
}
\arguments{
\item{file}{Either a path to a file, a connection, or literal data
(either a single string or a raw vector).
Files ending in \code{.gz}, \code{.bz2}, \code{.xz}, or \code{.zip} will
be automatically uncompressed. Files starting with \verb{http://},
\verb{https://}, \verb{ftp://}, or \verb{ftps://} will be automatically
downloaded. Remote gz files can also be automatically downloaded and
decompressed.
Literal data is most useful for examples and tests. To be recognised as
literal data, the input must be either wrapped with \code{I()}, be a string
containing at least one new line, or be a vector containing at least one
string with a new line.
Using a value of \code{\link[=clipboard]{clipboard()}} will read from the system clipboard.}
\item{col_positions}{Column positions, as created by \code{\link[=fwf_empty]{fwf_empty()}},
\code{\link[=fwf_widths]{fwf_widths()}} or \code{\link[=fwf_positions]{fwf_positions()}}. To read in only selected fields,
use \code{\link[=fwf_positions]{fwf_positions()}}. If the width of the last column is variable (a
ragged fwf file), supply the last end position as NA.}
\item{locale}{The locale controls defaults that vary from place to place.
The default locale is US-centric (like R), but you can use
\code{\link[=locale]{locale()}} to create your own locale that controls things like
the default time zone, encoding, decimal mark, big mark, and day/month
names.}
\item{na}{Character vector of strings to interpret as missing values. Set this
option to \code{character()} to indicate no missing values.}
\item{comment}{A string used to identify comments. Any text after the
comment characters will be silently ignored.}
\item{trim_ws}{Should leading and trailing whitespace (ASCII spaces and tabs) be trimmed from
each field before parsing it?}
\item{skip}{Number of lines to skip before reading data.}
\item{n_max}{Maximum number of lines to read.}
\item{progress}{Display a progress bar? By default it will only display
in an interactive session and not while knitting a document. The automatic
progress bar can be disabled by setting option \code{readr.show_progress} to
\code{FALSE}.}
\item{skip_empty_rows}{Should blank rows be ignored altogether? i.e. If this
option is \code{TRUE} then blank rows will not be represented at all. If it is
\code{FALSE} then they will be represented by \code{NA} values in all the columns.}
}
\description{
\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#superseded}{\figure{lifecycle-superseded.svg}{options: alt='[Superseded]'}}}{\strong{[Superseded]}}
This function has been superseded in readr and moved to \href{https://r-lib.github.io/meltr/}{the meltr package}.
}
\details{
For certain non-rectangular data formats, it can be useful to parse the data
into a melted format where each row represents a single token.
\code{melt_fwf()} parses each token of a fixed width file into a single row, but
it still requires that each field is in the same in every row of the
source file.
}
\examples{
fwf_sample <- readr_example("fwf-sample.txt")
cat(read_lines(fwf_sample))
# You can specify column positions in several ways:
# 1. Guess based on position of empty columns
melt_fwf(fwf_sample, fwf_empty(fwf_sample, col_names = c("first", "last", "state", "ssn")))
# 2. A vector of field widths
melt_fwf(fwf_sample, fwf_widths(c(20, 10, 12), c("name", "state", "ssn")))
# 3. Paired vectors of start and end positions
melt_fwf(fwf_sample, fwf_positions(c(1, 30), c(10, 42), c("name", "ssn")))
# 4. Named arguments with start and end positions
melt_fwf(fwf_sample, fwf_cols(name = c(1, 10), ssn = c(30, 42)))
# 5. Named arguments with column widths
melt_fwf(fwf_sample, fwf_cols(name = 20, state = 10, ssn = 12))
}
\seealso{
\code{\link[=melt_table]{melt_table()}} to melt fixed width files where each
column is separated by whitespace, and \code{\link[=read_fwf]{read_fwf()}} for the conventional
way to read rectangular data from fixed width files.
}
readr/man/count_fields.Rd 0000644 0001762 0000144 00000002742 14174704674 015112 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/count_fields.R
\name{count_fields}
\alias{count_fields}
\title{Count the number of fields in each line of a file}
\usage{
count_fields(file, tokenizer, skip = 0, n_max = -1L)
}
\arguments{
\item{file}{Either a path to a file, a connection, or literal data
(either a single string or a raw vector).
Files ending in \code{.gz}, \code{.bz2}, \code{.xz}, or \code{.zip} will
be automatically uncompressed. Files starting with \verb{http://},
\verb{https://}, \verb{ftp://}, or \verb{ftps://} will be automatically
downloaded. Remote gz files can also be automatically downloaded and
decompressed.
Literal data is most useful for examples and tests. To be recognised as
literal data, the input must be either wrapped with \code{I()}, be a string
containing at least one new line, or be a vector containing at least one
string with a new line.
Using a value of \code{\link[=clipboard]{clipboard()}} will read from the system clipboard.}
\item{tokenizer}{A tokenizer that specifies how to break the \code{file}
up into fields, e.g., \code{\link[=tokenizer_csv]{tokenizer_csv()}},
\code{\link[=tokenizer_fwf]{tokenizer_fwf()}}}
\item{skip}{Number of lines to skip before reading data.}
\item{n_max}{Optionally, maximum number of rows to count fields for.}
}
\description{
This is useful for diagnosing problems with functions that fail
to parse correctly.
}
\examples{
count_fields(readr_example("mtcars.csv"), tokenizer_csv())
}
readr/man/read_log.Rd 0000644 0001762 0000144 00000010101 14510343737 014165 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/read_log.R
\name{read_log}
\alias{read_log}
\title{Read common/combined log file into a tibble}
\usage{
read_log(
file,
col_names = FALSE,
col_types = NULL,
trim_ws = TRUE,
skip = 0,
n_max = Inf,
show_col_types = should_show_types(),
progress = show_progress()
)
}
\arguments{
\item{file}{Either a path to a file, a connection, or literal data
(either a single string or a raw vector).
Files ending in \code{.gz}, \code{.bz2}, \code{.xz}, or \code{.zip} will
be automatically uncompressed. Files starting with \verb{http://},
\verb{https://}, \verb{ftp://}, or \verb{ftps://} will be automatically
downloaded. Remote gz files can also be automatically downloaded and
decompressed.
Literal data is most useful for examples and tests. To be recognised as
literal data, the input must be either wrapped with \code{I()}, be a string
containing at least one new line, or be a vector containing at least one
string with a new line.
Using a value of \code{\link[=clipboard]{clipboard()}} will read from the system clipboard.}
\item{col_names}{Either \code{TRUE}, \code{FALSE} or a character vector
of column names.
If \code{TRUE}, the first row of the input will be used as the column
names, and will not be included in the data frame. If \code{FALSE}, column
names will be generated automatically: X1, X2, X3 etc.
If \code{col_names} is a character vector, the values will be used as the
names of the columns, and the first row of the input will be read into
the first row of the output data frame.
Missing (\code{NA}) column names will generate a warning, and be filled
in with dummy names \code{...1}, \code{...2} etc. Duplicate column names
will generate a warning and be made unique, see \code{name_repair} to control
how this is done.}
\item{col_types}{One of \code{NULL}, a \code{\link[=cols]{cols()}} specification, or
a string. See \code{vignette("readr")} for more details.
If \code{NULL}, all column types will be inferred from \code{guess_max} rows of the
input, interspersed throughout the file. This is convenient (and fast),
but not robust. If the guessed types are wrong, you'll need to increase
\code{guess_max} or supply the correct types yourself.
Column specifications created by \code{\link[=list]{list()}} or \code{\link[=cols]{cols()}} must contain
one column specification for each column. If you only want to read a
subset of the columns, use \code{\link[=cols_only]{cols_only()}}.
Alternatively, you can use a compact string representation where each
character represents one column:
\itemize{
\item c = character
\item i = integer
\item n = number
\item d = double
\item l = logical
\item f = factor
\item D = date
\item T = date time
\item t = time
\item ? = guess
\item _ or - = skip
}
By default, reading a file without a column specification will print a
message showing what \code{readr} guessed they were. To remove this message,
set \code{show_col_types = FALSE} or set \code{options(readr.show_col_types = FALSE)}.}
\item{trim_ws}{Should leading and trailing whitespace (ASCII spaces and tabs) be trimmed from
each field before parsing it?}
\item{skip}{Number of lines to skip before reading data. If \code{comment} is
supplied any commented lines are ignored \emph{after} skipping.}
\item{n_max}{Maximum number of lines to read.}
\item{show_col_types}{If \code{FALSE}, do not show the guessed column types. If
\code{TRUE} always show the column types, even if they are supplied. If \code{NULL}
(the default) only show the column types if they are not explicitly supplied
by the \code{col_types} argument.}
\item{progress}{Display a progress bar? By default it will only display
in an interactive session and not while knitting a document. The automatic
progress bar can be disabled by setting option \code{readr.show_progress} to
\code{FALSE}.}
}
\description{
This is a fairly standard format for log files - it uses both quotes
and square brackets for quoting, and there may be literal quotes embedded
in a quoted string. The dash, "-", is used for missing values.
}
\examples{
read_log(readr_example("example.log"))
}
readr/man/read_rds.Rd 0000644 0001762 0000144 00000003324 14304131171 014171 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/rds.R
\name{read_rds}
\alias{read_rds}
\alias{write_rds}
\title{Read/write RDS files.}
\usage{
read_rds(file, refhook = NULL)
write_rds(
x,
file,
compress = c("none", "gz", "bz2", "xz"),
version = 2,
refhook = NULL,
text = FALSE,
path = deprecated(),
...
)
}
\arguments{
\item{file}{The file path to read from/write to.}
\item{refhook}{A function to handle reference objects.}
\item{x}{R object to write to serialise.}
\item{compress}{Compression method to use: "none", "gz" ,"bz", or "xz".}
\item{version}{Serialization format version to be used. The default value is 2
as it's compatible for R versions prior to 3.5.0. See \code{\link[base:readRDS]{base::saveRDS()}}
for more details.}
\item{text}{If \code{TRUE} a text representation is used, otherwise a binary representation is used.}
\item{path}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} Use the \code{file} argument
instead.}
\item{...}{Additional arguments to connection function. For example, control
the space-time trade-off of different compression methods with
\code{compression}. See \code{\link[=connections]{connections()}} for more details.}
}
\value{
\code{write_rds()} returns \code{x}, invisibly.
}
\description{
Consistent wrapper around \code{\link[=saveRDS]{saveRDS()}} and \code{\link[=readRDS]{readRDS()}}.
\code{write_rds()} does not compress by default as space is generally cheaper
than time.
}
\examples{
temp <- tempfile()
write_rds(mtcars, temp)
read_rds(temp)
\dontrun{
write_rds(mtcars, "compressed_mtc.rds", "xz", compression = 9L)
}
}
readr/man/callback.Rd 0000644 0001762 0000144 00000003755 14315642631 014164 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/callback.R
\name{callback}
\alias{callback}
\alias{ChunkCallback}
\alias{SideEffectChunkCallback}
\alias{DataFrameCallback}
\alias{ListCallback}
\alias{AccumulateCallback}
\title{Callback classes}
\description{
These classes are used to define callback behaviors.
}
\details{
\describe{
\item{ChunkCallback}{Callback interface definition, all callback functions should inherit from this class.}
\item{SideEffectChunkCallback}{Callback function that is used only for side effects, no results are returned.}
\item{DataFrameCallback}{Callback function that combines each result together at the end.}
\item{AccumulateCallBack}{
Callback function that accumulates a single result. Requires the parameter \code{acc} to specify
the initial value of the accumulator. The parameter \code{acc} is \code{NULL} by default.
}
}
}
\examples{
## If given a regular function it is converted to a SideEffectChunkCallback
# view structure of each chunk
read_lines_chunked(readr_example("mtcars.csv"), str, chunk_size = 5)
# Print starting line of each chunk
f <- function(x, pos) print(pos)
read_lines_chunked(readr_example("mtcars.csv"), SideEffectChunkCallback$new(f), chunk_size = 5)
# If combined results are desired you can use the DataFrameCallback
# Cars with 3 gears
f <- function(x, pos) subset(x, gear == 3)
read_csv_chunked(readr_example("mtcars.csv"), DataFrameCallback$new(f), chunk_size = 5)
# The ListCallback can be used for more flexible output
f <- function(x, pos) x$mpg[x$hp > 100]
read_csv_chunked(readr_example("mtcars.csv"), ListCallback$new(f), chunk_size = 5)
# The AccumulateCallback accumulates results from each chunk
f <- function(x, pos, acc) sum(x$mpg) + acc
read_csv_chunked(readr_example("mtcars.csv"), AccumulateCallback$new(f, acc = 0), chunk_size = 5)
}
\seealso{
Other chunked:
\code{\link{melt_delim_chunked}()},
\code{\link{read_delim_chunked}()},
\code{\link{read_lines_chunked}()}
}
\concept{chunked}
\keyword{internal}
readr/man/datasource.Rd 0000644 0001762 0000144 00000003016 14174704674 014561 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/source.R
\name{datasource}
\alias{datasource}
\title{Create a source object.}
\usage{
datasource(
file,
skip = 0,
skip_empty_rows = FALSE,
comment = "",
skip_quote = TRUE
)
}
\arguments{
\item{file}{Either a path to a file, a connection, or literal data
(either a single string or a raw vector).
Files ending in \code{.gz}, \code{.bz2}, \code{.xz}, or \code{.zip} will
be automatically uncompressed. Files starting with \verb{http://},
\verb{https://}, \verb{ftp://}, or \verb{ftps://} will be automatically
downloaded. Remote gz files can also be automatically downloaded and
decompressed.
Literal data is most useful for examples and tests. To be recognised as
literal data, the input must be either wrapped with \code{I()}, be a string
containing at least one new line, or be a vector containing at least one
string with a new line.
Using a value of \code{\link[=clipboard]{clipboard()}} will read from the system clipboard.}
\item{skip}{Number of lines to skip before reading data.}
}
\description{
Create a source object.
}
\examples{
# Literal csv
datasource("a,b,c\n1,2,3")
datasource(charToRaw("a,b,c\n1,2,3"))
# Strings
datasource(readr_example("mtcars.csv"))
datasource(readr_example("mtcars.csv.bz2"))
datasource(readr_example("mtcars.csv.zip"))
\dontrun{
datasource("https://github.com/tidyverse/readr/raw/main/inst/extdata/mtcars.csv")
}
# Connection
con <- rawConnection(charToRaw("abc\n123"))
datasource(con)
close(con)
}
\keyword{internal}
readr/man/read_table.Rd 0000644 0001762 0000144 00000012573 14510343737 014512 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/read_table.R
\name{read_table}
\alias{read_table}
\title{Read whitespace-separated columns into a tibble}
\usage{
read_table(
file,
col_names = TRUE,
col_types = NULL,
locale = default_locale(),
na = "NA",
skip = 0,
n_max = Inf,
guess_max = min(n_max, 1000),
progress = show_progress(),
comment = "",
show_col_types = should_show_types(),
skip_empty_rows = TRUE
)
}
\arguments{
\item{file}{Either a path to a file, a connection, or literal data
(either a single string or a raw vector).
Files ending in \code{.gz}, \code{.bz2}, \code{.xz}, or \code{.zip} will
be automatically uncompressed. Files starting with \verb{http://},
\verb{https://}, \verb{ftp://}, or \verb{ftps://} will be automatically
downloaded. Remote gz files can also be automatically downloaded and
decompressed.
Literal data is most useful for examples and tests. To be recognised as
literal data, the input must be either wrapped with \code{I()}, be a string
containing at least one new line, or be a vector containing at least one
string with a new line.
Using a value of \code{\link[=clipboard]{clipboard()}} will read from the system clipboard.}
\item{col_names}{Either \code{TRUE}, \code{FALSE} or a character vector
of column names.
If \code{TRUE}, the first row of the input will be used as the column
names, and will not be included in the data frame. If \code{FALSE}, column
names will be generated automatically: X1, X2, X3 etc.
If \code{col_names} is a character vector, the values will be used as the
names of the columns, and the first row of the input will be read into
the first row of the output data frame.
Missing (\code{NA}) column names will generate a warning, and be filled
in with dummy names \code{...1}, \code{...2} etc. Duplicate column names
will generate a warning and be made unique, see \code{name_repair} to control
how this is done.}
\item{col_types}{One of \code{NULL}, a \code{\link[=cols]{cols()}} specification, or
a string. See \code{vignette("readr")} for more details.
If \code{NULL}, all column types will be inferred from \code{guess_max} rows of the
input, interspersed throughout the file. This is convenient (and fast),
but not robust. If the guessed types are wrong, you'll need to increase
\code{guess_max} or supply the correct types yourself.
Column specifications created by \code{\link[=list]{list()}} or \code{\link[=cols]{cols()}} must contain
one column specification for each column. If you only want to read a
subset of the columns, use \code{\link[=cols_only]{cols_only()}}.
Alternatively, you can use a compact string representation where each
character represents one column:
\itemize{
\item c = character
\item i = integer
\item n = number
\item d = double
\item l = logical
\item f = factor
\item D = date
\item T = date time
\item t = time
\item ? = guess
\item _ or - = skip
}
By default, reading a file without a column specification will print a
message showing what \code{readr} guessed they were. To remove this message,
set \code{show_col_types = FALSE} or set \code{options(readr.show_col_types = FALSE)}.}
\item{locale}{The locale controls defaults that vary from place to place.
The default locale is US-centric (like R), but you can use
\code{\link[=locale]{locale()}} to create your own locale that controls things like
the default time zone, encoding, decimal mark, big mark, and day/month
names.}
\item{na}{Character vector of strings to interpret as missing values. Set this
option to \code{character()} to indicate no missing values.}
\item{skip}{Number of lines to skip before reading data.}
\item{n_max}{Maximum number of lines to read.}
\item{guess_max}{Maximum number of lines to use for guessing column types.
Will never use more than the number of lines read.
See \code{vignette("column-types", package = "readr")} for more details.}
\item{progress}{Display a progress bar? By default it will only display
in an interactive session and not while knitting a document. The automatic
progress bar can be disabled by setting option \code{readr.show_progress} to
\code{FALSE}.}
\item{comment}{A string used to identify comments. Any text after the
comment characters will be silently ignored.}
\item{show_col_types}{If \code{FALSE}, do not show the guessed column types. If
\code{TRUE} always show the column types, even if they are supplied. If \code{NULL}
(the default) only show the column types if they are not explicitly supplied
by the \code{col_types} argument.}
\item{skip_empty_rows}{Should blank rows be ignored altogether? i.e. If this
option is \code{TRUE} then blank rows will not be represented at all. If it is
\code{FALSE} then they will be represented by \code{NA} values in all the columns.}
}
\description{
\code{read_table()} is designed to read the type of textual
data where each column is separated by one (or more) columns of space.
\code{read_table()} is like \code{\link[=read.table]{read.table()}}, it allows any number of whitespace
characters between columns, and the lines can be of different lengths.
\code{spec_table()} returns the column specifications rather than a data frame.
}
\examples{
ws <- readr_example("whitespace-sample.txt")
writeLines(read_lines(ws))
read_table(ws)
}
\seealso{
\code{\link[=read_fwf]{read_fwf()}} to read fixed width files where each column
is not separated by whitespace. \code{read_fwf()} is also useful for reading
tabular data with non-standard formatting.
}
readr/man/spec.Rd 0000644 0001762 0000144 00000001725 14174357220 013355 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/col_types.R
\name{cols_condense}
\alias{cols_condense}
\alias{spec}
\title{Examine the column specifications for a data frame}
\usage{
cols_condense(x)
spec(x)
}
\arguments{
\item{x}{The data frame object to extract from}
}
\value{
A col_spec object.
}
\description{
\code{cols_condense()} takes a spec object and condenses its definition by setting
the default column type to the most frequent type and only listing columns
with a different type.
\code{spec()} extracts the full column specification from a tibble
created by readr.
}
\examples{
df <- read_csv(readr_example("mtcars.csv"))
s <- spec(df)
s
cols_condense(s)
}
\seealso{
Other parsers:
\code{\link{col_skip}()},
\code{\link{cols}()},
\code{\link{parse_datetime}()},
\code{\link{parse_factor}()},
\code{\link{parse_guess}()},
\code{\link{parse_logical}()},
\code{\link{parse_number}()},
\code{\link{parse_vector}()}
}
\concept{parsers}
readr/man/parse_factor.Rd 0000644 0001762 0000144 00000004755 14304131171 015067 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/collectors.R
\name{parse_factor}
\alias{parse_factor}
\alias{col_factor}
\title{Parse factors}
\usage{
parse_factor(
x,
levels = NULL,
ordered = FALSE,
na = c("", "NA"),
locale = default_locale(),
include_na = TRUE,
trim_ws = TRUE
)
col_factor(levels = NULL, ordered = FALSE, include_na = FALSE)
}
\arguments{
\item{x}{Character vector of values to parse.}
\item{levels}{Character vector of the allowed levels. When \code{levels = NULL}
(the default), \code{levels} are discovered from the unique values of \code{x}, in
the order in which they appear in \code{x}.}
\item{ordered}{Is it an ordered factor?}
\item{na}{Character vector of strings to interpret as missing values. Set this
option to \code{character()} to indicate no missing values.}
\item{locale}{The locale controls defaults that vary from place to place.
The default locale is US-centric (like R), but you can use
\code{\link[=locale]{locale()}} to create your own locale that controls things like
the default time zone, encoding, decimal mark, big mark, and day/month
names.}
\item{include_na}{If \code{TRUE} and \code{x} contains at least one \code{NA}, then \code{NA}
is included in the levels of the constructed factor.}
\item{trim_ws}{Should leading and trailing whitespace (ASCII spaces and tabs) be trimmed from
each field before parsing it?}
}
\description{
\code{parse_factor()} is similar to \code{\link[=factor]{factor()}}, but generates a warning if
\code{levels} have been specified and some elements of \code{x} are not found in those
\code{levels}.
}
\examples{
# discover the levels from the data
parse_factor(c("a", "b"))
parse_factor(c("a", "b", "-99"))
parse_factor(c("a", "b", "-99"), na = c("", "NA", "-99"))
parse_factor(c("a", "b", "-99"), na = c("", "NA", "-99"), include_na = FALSE)
# provide the levels explicitly
parse_factor(c("a", "b"), levels = letters[1:5])
x <- c("cat", "dog", "caw")
animals <- c("cat", "dog", "cow")
# base::factor() silently converts elements that do not match any levels to
# NA
factor(x, levels = animals)
# parse_factor() generates same factor as base::factor() but throws a warning
# and reports problems
parse_factor(x, levels = animals)
}
\seealso{
Other parsers:
\code{\link{col_skip}()},
\code{\link{cols_condense}()},
\code{\link{cols}()},
\code{\link{parse_datetime}()},
\code{\link{parse_guess}()},
\code{\link{parse_logical}()},
\code{\link{parse_number}()},
\code{\link{parse_vector}()}
}
\concept{parsers}
readr/man/output_column.Rd 0000644 0001762 0000144 00000001047 14174357220 015335 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/write.R
\name{output_column}
\alias{output_column}
\title{Preprocess column for output}
\usage{
output_column(x, name)
}
\arguments{
\item{x}{A vector}
}
\description{
This is a generic function that applied to each column before it is saved
to disk. It provides a hook for S3 classes that need special handling.
}
\examples{
# Most columns are not altered, but POSIXct are converted to ISO8601.
x <- parse_datetime("2016-01-01")
str(output_column(x))
}
\keyword{internal}
readr/man/type_convert.Rd 0000644 0001762 0000144 00000004334 14174704674 015154 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/type_convert.R
\name{type_convert}
\alias{type_convert}
\title{Re-convert character columns in existing data frame}
\usage{
type_convert(
df,
col_types = NULL,
na = c("", "NA"),
trim_ws = TRUE,
locale = default_locale(),
guess_integer = FALSE
)
}
\arguments{
\item{df}{A data frame.}
\item{col_types}{One of \code{NULL}, a \code{\link[=cols]{cols()}} specification, or
a string. See \code{vignette("readr")} for more details.
If \code{NULL}, column types will be imputed using all rows.}
\item{na}{Character vector of strings to interpret as missing values. Set this
option to \code{character()} to indicate no missing values.}
\item{trim_ws}{Should leading and trailing whitespace (ASCII spaces and tabs) be trimmed from
each field before parsing it?}
\item{locale}{The locale controls defaults that vary from place to place.
The default locale is US-centric (like R), but you can use
\code{\link[=locale]{locale()}} to create your own locale that controls things like
the default time zone, encoding, decimal mark, big mark, and day/month
names.}
\item{guess_integer}{If \code{TRUE}, guess integer types for whole numbers, if
\code{FALSE} guess numeric type for all numbers.}
}
\description{
This is useful if you need to do some manual munging - you can read the
columns in as character, clean it up with (e.g.) regular expressions and
then let readr take another stab at parsing it. The name is a homage to
the base \code{\link[utils:type.convert]{utils::type.convert()}}.
}
\note{
\code{type_convert()} removes a 'spec' attribute,
because it likely modifies the column data types.
(see \code{\link[=spec]{spec()}} for more information about column specifications).
}
\examples{
df <- data.frame(
x = as.character(runif(10)),
y = as.character(sample(10)),
stringsAsFactors = FALSE
)
str(df)
str(type_convert(df))
df <- data.frame(x = c("NA", "10"), stringsAsFactors = FALSE)
str(type_convert(df))
# Type convert can be used to infer types from an entire dataset
# first read the data as character
data <- read_csv(readr_example("mtcars.csv"),
col_types = list(.default = col_character())
)
str(data)
# Then convert it with type_convert
type_convert(data)
}
readr/man/show_progress.Rd 0000644 0001762 0000144 00000001275 14304131171 015315 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/utils.R
\name{show_progress}
\alias{show_progress}
\title{Determine whether progress bars should be shown}
\usage{
show_progress()
}
\description{
By default, readr shows progress bars. However, progress reporting is
suppressed if any of the following conditions hold:
\itemize{
\item The bar is explicitly disabled by setting
\code{options(readr.show_progress = FALSE)}.
\item The code is run in a non-interactive session, as determined by
\code{\link[rlang:is_interactive]{rlang::is_interactive()}}.
\item The code is run in an RStudio notebook chunk, as determined by
\code{getOption("rstudio.notebook.executing")}.
}
}
readr/man/tokenize.Rd 0000644 0001762 0000144 00000003032 14174704674 014255 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/tokenizer.R
\name{tokenize}
\alias{tokenize}
\title{Tokenize a file/string.}
\usage{
tokenize(file, tokenizer = tokenizer_csv(), skip = 0, n_max = -1L)
}
\arguments{
\item{file}{Either a path to a file, a connection, or literal data
(either a single string or a raw vector).
Files ending in \code{.gz}, \code{.bz2}, \code{.xz}, or \code{.zip} will
be automatically uncompressed. Files starting with \verb{http://},
\verb{https://}, \verb{ftp://}, or \verb{ftps://} will be automatically
downloaded. Remote gz files can also be automatically downloaded and
decompressed.
Literal data is most useful for examples and tests. To be recognised as
literal data, the input must be either wrapped with \code{I()}, be a string
containing at least one new line, or be a vector containing at least one
string with a new line.
Using a value of \code{\link[=clipboard]{clipboard()}} will read from the system clipboard.}
\item{tokenizer}{A tokenizer specification.}
\item{skip}{Number of lines to skip before reading data.}
\item{n_max}{Optionally, maximum number of rows to tokenize.}
}
\description{
Turns input into a character vector. Usually the tokenization is done purely
in C++, and never exposed to R (because that requires a copy). This function
is useful for testing, or when a file doesn't parse correctly and you want
to see the underlying tokens.
}
\examples{
tokenize("1,2\n3,4,5\n\n6")
# Only tokenize first two lines
tokenize("1,2\n3,4,5\n\n6", n = 2)
}
\keyword{internal}
readr/man/encoding.Rd 0000644 0001762 0000144 00000001627 14174357220 014212 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/encoding.R
\name{guess_encoding}
\alias{guess_encoding}
\title{Guess encoding of file}
\usage{
guess_encoding(file, n_max = 10000, threshold = 0.2)
}
\arguments{
\item{file}{A character string specifying an input as specified in
\code{\link[=datasource]{datasource()}}, a raw vector, or a list of raw vectors.}
\item{n_max}{Number of lines to read. If \code{n_max} is -1, all lines in
file will be read.}
\item{threshold}{Only report guesses above this threshold of certainty.}
}
\value{
A tibble
}
\description{
Uses \code{\link[stringi:stri_enc_detect]{stringi::stri_enc_detect()}}: see the documentation there
for caveats.
}
\examples{
guess_encoding(readr_example("mtcars.csv"))
guess_encoding(read_lines_raw(readr_example("mtcars.csv")))
guess_encoding(read_file_raw(readr_example("mtcars.csv")))
guess_encoding("a\n\u00b5\u00b5")
}
readr/man/melt_table.Rd 0000644 0001762 0000144 00000010042 14371264576 014535 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/melt_table.R
\name{melt_table}
\alias{melt_table}
\alias{melt_table2}
\title{Return melted data for each token in a whitespace-separated file}
\usage{
melt_table(
file,
locale = default_locale(),
na = "NA",
skip = 0,
n_max = Inf,
guess_max = min(n_max, 1000),
progress = show_progress(),
comment = "",
skip_empty_rows = FALSE
)
melt_table2(
file,
locale = default_locale(),
na = "NA",
skip = 0,
n_max = Inf,
progress = show_progress(),
comment = "",
skip_empty_rows = FALSE
)
}
\arguments{
\item{file}{Either a path to a file, a connection, or literal data
(either a single string or a raw vector).
Files ending in \code{.gz}, \code{.bz2}, \code{.xz}, or \code{.zip} will
be automatically uncompressed. Files starting with \verb{http://},
\verb{https://}, \verb{ftp://}, or \verb{ftps://} will be automatically
downloaded. Remote gz files can also be automatically downloaded and
decompressed.
Literal data is most useful for examples and tests. To be recognised as
literal data, the input must be either wrapped with \code{I()}, be a string
containing at least one new line, or be a vector containing at least one
string with a new line.
Using a value of \code{\link[=clipboard]{clipboard()}} will read from the system clipboard.}
\item{locale}{The locale controls defaults that vary from place to place.
The default locale is US-centric (like R), but you can use
\code{\link[=locale]{locale()}} to create your own locale that controls things like
the default time zone, encoding, decimal mark, big mark, and day/month
names.}
\item{na}{Character vector of strings to interpret as missing values. Set this
option to \code{character()} to indicate no missing values.}
\item{skip}{Number of lines to skip before reading data.}
\item{n_max}{Maximum number of lines to read.}
\item{guess_max}{Maximum number of lines to use for guessing column types.
Will never use more than the number of lines read.
See \code{vignette("column-types", package = "readr")} for more details.}
\item{progress}{Display a progress bar? By default it will only display
in an interactive session and not while knitting a document. The automatic
progress bar can be disabled by setting option \code{readr.show_progress} to
\code{FALSE}.}
\item{comment}{A string used to identify comments. Any text after the
comment characters will be silently ignored.}
\item{skip_empty_rows}{Should blank rows be ignored altogether? i.e. If this
option is \code{TRUE} then blank rows will not be represented at all. If it is
\code{FALSE} then they will be represented by \code{NA} values in all the columns.}
}
\description{
\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#superseded}{\figure{lifecycle-superseded.svg}{options: alt='[Superseded]'}}}{\strong{[Superseded]}}
This function has been superseded in readr and moved to \href{https://r-lib.github.io/meltr/}{the meltr package}.
For certain non-rectangular data formats, it can be useful to parse the data
into a melted format where each row represents a single token.
\code{melt_table()} and \code{melt_table2()} are designed to read the type of textual
data where each column is separated by one (or more) columns of space.
\code{melt_table2()} allows any number of whitespace characters between columns,
and the lines can be of different lengths.
\code{melt_table()} is more strict, each line must be the same length,
and each field is in the same position in every line. It first finds empty
columns and then parses like a fixed width file.
}
\examples{
fwf <- readr_example("fwf-sample.txt")
writeLines(read_lines(fwf))
melt_table(fwf)
ws <- readr_example("whitespace-sample.txt")
writeLines(read_lines(ws))
melt_table2(ws)
}
\seealso{
\code{\link[=melt_fwf]{melt_fwf()}} to melt fixed width files where each column
is not separated by whitespace. \code{melt_fwf()} is also useful for reading
tabular data with non-standard formatting. \code{\link[=read_table]{read_table()}} is the
conventional way to read tabular data from whitespace-separated files.
}
readr/man/figures/ 0000755 0001762 0000144 00000000000 14403212750 013564 5 ustar ligges users readr/man/figures/lifecycle-defunct.svg 0000644 0001762 0000144 00000002424 14403212750 017674 0 ustar ligges users
readr/man/figures/lifecycle-maturing.svg 0000644 0001762 0000144 00000002430 14403212750 020067 0 ustar ligges users
readr/man/figures/logo.png 0000644 0001762 0000144 00000037651 14403211600 015237 0 ustar ligges users PNG
IHDR ޫh gAMA a cHRM z&