dtplyr/ 0000755 0001762 0000144 00000000000 14406577055 011605 5 ustar ligges users dtplyr/NAMESPACE 0000644 0001762 0000144 00000010044 14372711230 013006 0 ustar ligges users # Generated by roxygen2: do not edit by hand
S3method(add_count,dtplyr_step)
S3method(anti_join,dtplyr_step)
S3method(arrange,dtplyr_step)
S3method(as.data.frame,dtplyr_step)
S3method(as.data.frame,foo)
S3method(as.data.table,dtplyr_step)
S3method(as_tibble,dtplyr_step)
S3method(auto_copy,dtplyr_step)
S3method(collect,dtplyr_step)
S3method(compute,dtplyr_step)
S3method(count,dtplyr_step)
S3method(dim,dtplyr_step)
S3method(dim,dtplyr_step_first)
S3method(distinct,dtplyr_step)
S3method(do,dtplyr_step)
S3method(dt_call,dtplyr_step)
S3method(dt_call,dtplyr_step_assign)
S3method(dt_call,dtplyr_step_first)
S3method(dt_call,dtplyr_step_join)
S3method(dt_call,dtplyr_step_modify)
S3method(dt_call,dtplyr_step_set)
S3method(dt_call,dtplyr_step_subset)
S3method(dt_has_computation,dtplyr_step)
S3method(dt_has_computation,dtplyr_step_first)
S3method(dt_has_computation,dtplyr_step_group)
S3method(dt_sources,dtplyr_step)
S3method(dt_sources,dtplyr_step_first)
S3method(dt_sources,dtplyr_step_join)
S3method(dt_sources,dtplyr_step_set)
S3method(dt_sources,dtplyr_step_subset)
S3method(full_join,dtplyr_step)
S3method(glimpse,dtplyr_step)
S3method(group_by,dtplyr_step)
S3method(group_map,dtplyr_step)
S3method(group_modify,dtplyr_step)
S3method(group_size,dtplyr_step)
S3method(group_vars,dtplyr_step)
S3method(groups,dtplyr_step)
S3method(head,dtplyr_step)
S3method(inner_join,dtplyr_step)
S3method(left_join,dtplyr_step)
S3method(mutate,dtplyr_step)
S3method(n_groups,dtplyr_step)
S3method(print,dtplyr_step)
S3method(pull,dtplyr_step)
S3method(relocate,dtplyr_step)
S3method(rename,dtplyr_step)
S3method(rename_with,dtplyr_step)
S3method(right_join,dtplyr_step)
S3method(same_src,dtplyr_step)
S3method(sample_frac,dtplyr_step)
S3method(sample_n,dtplyr_step)
S3method(select,dtplyr_step)
S3method(semi_join,dtplyr_step)
S3method(show_query,dtplyr_step)
S3method(slice,dtplyr_step)
S3method(slice_head,dtplyr_step)
S3method(slice_max,dtplyr_step)
S3method(slice_min,dtplyr_step)
S3method(slice_sample,dtplyr_step)
S3method(slice_tail,dtplyr_step)
S3method(summarise,dtplyr_step)
S3method(tail,dtplyr_step)
S3method(tally,dtplyr_step)
S3method(tbl_vars,dtplyr_step)
S3method(tbl_vars,foo)
S3method(tidyselect_data_has_predicates,dtplyr_step)
S3method(tidyselect_data_proxy,dtplyr_step)
S3method(transmute,dtplyr_step)
S3method(ungroup,dtplyr_step)
S3method(union_all,dtplyr_step)
S3method(unique,dtplyr_step)
export(.datatable.aware)
export(lazy_dt)
import(rlang)
importFrom(data.table,as.data.table)
importFrom(data.table,data.table)
importFrom(data.table,is.data.table)
importFrom(dplyr,add_count)
importFrom(dplyr,anti_join)
importFrom(dplyr,arrange)
importFrom(dplyr,auto_copy)
importFrom(dplyr,collect)
importFrom(dplyr,compute)
importFrom(dplyr,count)
importFrom(dplyr,distinct)
importFrom(dplyr,do)
importFrom(dplyr,filter)
importFrom(dplyr,full_join)
importFrom(dplyr,glimpse)
importFrom(dplyr,group_by)
importFrom(dplyr,group_map)
importFrom(dplyr,group_modify)
importFrom(dplyr,group_size)
importFrom(dplyr,group_vars)
importFrom(dplyr,groups)
importFrom(dplyr,inner_join)
importFrom(dplyr,intersect)
importFrom(dplyr,left_join)
importFrom(dplyr,mutate)
importFrom(dplyr,n_groups)
importFrom(dplyr,pull)
importFrom(dplyr,relocate)
importFrom(dplyr,rename)
importFrom(dplyr,rename_with)
importFrom(dplyr,right_join)
importFrom(dplyr,same_src)
importFrom(dplyr,sample_frac)
importFrom(dplyr,sample_n)
importFrom(dplyr,select)
importFrom(dplyr,semi_join)
importFrom(dplyr,setdiff)
importFrom(dplyr,show_query)
importFrom(dplyr,slice)
importFrom(dplyr,slice_head)
importFrom(dplyr,slice_max)
importFrom(dplyr,slice_min)
importFrom(dplyr,slice_sample)
importFrom(dplyr,slice_tail)
importFrom(dplyr,summarise)
importFrom(dplyr,tally)
importFrom(dplyr,tbl_vars)
importFrom(dplyr,transmute)
importFrom(dplyr,ungroup)
importFrom(dplyr,union)
importFrom(dplyr,union_all)
importFrom(glue,glue)
importFrom(lifecycle,deprecated)
importFrom(tibble,as_tibble)
importFrom(tidyselect,everything)
importFrom(tidyselect,tidyselect_data_has_predicates)
importFrom(tidyselect,tidyselect_data_proxy)
importFrom(utils,head)
importFrom(utils,tail)
dtplyr/LICENSE 0000644 0001762 0000144 00000000054 14004642135 012573 0 ustar ligges users YEAR: 2020
COPYRIGHT HOLDER: dtplyr authors
dtplyr/README.md 0000644 0001762 0000144 00000007324 14406336073 013063 0 ustar ligges users
# dtplyr
[](https://cran.r-project.org/package=dtplyr)
[](https://github.com/tidyverse/dtplyr/actions)
[](https://app.codecov.io/gh/tidyverse/dtplyr?branch=main)
## Overview
dtplyr provides a [data.table](http://r-datatable.com/) backend for
dplyr. The goal of dtplyr is to allow you to write dplyr code that is
automatically translated to the equivalent, but usually much faster,
data.table code.
See `vignette("translation")` for details of the current translations,
and [table.express](https://github.com/asardaes/table.express) and
[rqdatatable](https://github.com/WinVector/rqdatatable/) for related
work.
## Installation
You can install from CRAN with:
``` r
install.packages("dtplyr")
```
Or try the development version from GitHub with:
``` r
# install.packages("devtools")
devtools::install_github("tidyverse/dtplyr")
```
## Usage
To use dtplyr, you must at least load dtplyr and dplyr. You may also
want to load [data.table](http://r-datatable.com/) so you can access the
other goodies that it provides:
``` r
library(data.table)
library(dtplyr)
library(dplyr, warn.conflicts = FALSE)
```
Then use `lazy_dt()` to create a “lazy” data table that tracks the
operations performed on it.
``` r
mtcars2 <- lazy_dt(mtcars)
```
You can preview the transformation (including the generated data.table
code) by printing the result:
``` r
mtcars2 %>%
filter(wt < 5) %>%
mutate(l100k = 235.21 / mpg) %>% # liters / 100 km
group_by(cyl) %>%
summarise(l100k = mean(l100k))
#> Source: local data table [3 x 2]
#> Call: `_DT1`[wt < 5][, `:=`(l100k = 235.21/mpg)][, .(l100k = mean(l100k)),
#> keyby = .(cyl)]
#>
#> cyl l100k
#>
#> 1 4 9.05
#> 2 6 12.0
#> 3 8 14.9
#>
#> # Use as.data.table()/as.data.frame()/as_tibble() to access results
```
But generally you should reserve this only for debugging, and use
`as.data.table()`, `as.data.frame()`, or `as_tibble()` to indicate that
you’re done with the transformation and want to access the results:
``` r
mtcars2 %>%
filter(wt < 5) %>%
mutate(l100k = 235.21 / mpg) %>% # liters / 100 km
group_by(cyl) %>%
summarise(l100k = mean(l100k)) %>%
as_tibble()
#> # A tibble: 3 × 2
#> cyl l100k
#>
#> 1 4 9.05
#> 2 6 12.0
#> 3 8 14.9
```
## Why is dtplyr slower than data.table?
There are two primary reasons that dtplyr will always be somewhat slower
than data.table:
- Each dplyr verb must do some work to convert dplyr syntax to
data.table syntax. This takes time proportional to the complexity of
the input code, not the input *data*, so should be a negligible
overhead for large datasets. [Initial
benchmarks](https://dtplyr.tidyverse.org/articles/translation.html#performance)
suggest that the overhead should be under 1ms per dplyr call.
- To match dplyr semantics, `mutate()` does not modify in place by
default. This means that most expressions involving `mutate()` must
make a copy that would not be necessary if you were using data.table
directly. (You can opt out of this behaviour in `lazy_dt()` with
`immutable = FALSE`).
## Code of Conduct
Please note that the dtplyr project is released with a [Contributor Code
of Conduct](https://dtplyr.tidyverse.org/CODE_OF_CONDUCT.html). By
contributing to this project, you agree to abide by its terms.
dtplyr/man/ 0000755 0001762 0000144 00000000000 14406335651 012352 5 ustar ligges users dtplyr/man/mutate.dtplyr_step.Rd 0000644 0001762 0000144 00000006052 14372711230 016504 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/step-mutate.R
\name{mutate.dtplyr_step}
\alias{mutate.dtplyr_step}
\title{Create and modify columns}
\usage{
\method{mutate}{dtplyr_step}(
.data,
...,
.by = NULL,
.keep = c("all", "used", "unused", "none"),
.before = NULL,
.after = NULL
)
}
\arguments{
\item{.data}{A \code{\link[=lazy_dt]{lazy_dt()}}.}
\item{...}{<\code{\link[dplyr:dplyr_data_masking]{data-masking}}> Name-value pairs.
The name gives the name of the column in the output.
The value can be:
\itemize{
\item A vector of length 1, which will be recycled to the correct length.
\item A vector the same length as the current group (or the whole data frame
if ungrouped).
\item \code{NULL}, to remove the column.
\item A data frame or tibble, to create multiple columns in the output.
}}
\item{.by}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}}
<\code{\link[dplyr:dplyr_tidy_select]{tidy-select}}> Optionally, a selection of columns to
group by for just this operation, functioning as an alternative to \code{\link[dplyr:group_by]{group_by()}}. For
details and examples, see \link[dplyr:dplyr_by]{?dplyr_by}.}
\item{.keep}{Control which columns from \code{.data} are retained in the output. Grouping
columns and columns created by \code{...} are always kept.
\itemize{
\item \code{"all"} retains all columns from \code{.data}. This is the default.
\item \code{"used"} retains only the columns used in \code{...} to create new
columns. This is useful for checking your work, as it displays inputs
and outputs side-by-side.
\item \code{"unused"} retains only the columns \emph{not} used in \code{...} to create new
columns. This is useful if you generate new columns, but no longer need
the columns used to generate them.
\item \code{"none"} doesn't retain any extra columns from \code{.data}. Only the grouping
variables and columns created by \code{...} are kept.
}
Note: With dtplyr \code{.keep} will only work with column names passed as symbols, and won't
work with other workflows (e.g. \code{eval(parse(text = "x + 1"))})}
\item{.before, .after}{<\code{\link[dplyr:dplyr_tidy_select]{tidy-select}}> Optionally, control where new columns
should appear (the default is to add to the right hand side). See
\code{\link[dplyr:relocate]{relocate()}} for more details.}
}
\description{
This is a method for the dplyr \code{\link[=mutate]{mutate()}} generic. It is translated to
the \code{j} argument of \verb{[.data.table}, using \verb{:=} to modify "in place". If
\code{.before} or \code{.after} is provided, the new columns are relocated with a call
to \code{\link[data.table:setcolorder]{data.table::setcolorder()}}.
}
\examples{
library(dplyr, warn.conflicts = FALSE)
dt <- lazy_dt(data.frame(x = 1:5, y = 5:1))
dt \%>\%
mutate(a = (x + y) / 2, b = sqrt(x^2 + y^2))
# It uses a more sophisticated translation when newly created variables
# are used in the same expression
dt \%>\%
mutate(x1 = x + 1, x2 = x1 + 1)
}
dtplyr/man/expand.dtplyr_step.Rd 0000644 0001762 0000144 00000006151 14150760302 016462 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/step-subset-expand.R
\name{expand.dtplyr_step}
\alias{expand.dtplyr_step}
\title{Expand data frame to include all possible combinations of values.}
\usage{
\method{expand}{dtplyr_step}(data, ..., .name_repair = "check_unique")
}
\arguments{
\item{data}{A \code{\link[=lazy_dt]{lazy_dt()}}.}
\item{...}{Specification of columns to expand. Columns can be atomic vectors
or lists.
\itemize{
\item To find all unique combinations of \code{x}, \code{y} and \code{z}, including those not
present in the data, supply each variable as a separate argument:
\code{expand(df, x, y, z)}.
\item To find only the combinations that occur in the
data, use \code{nesting}: \code{expand(df, nesting(x, y, z))}.
\item You can combine the two forms. For example,
\code{expand(df, nesting(school_id, student_id), date)} would produce
a row for each present school-student combination for all possible
dates.
}
Unlike the data.frame method, this method does not use the full set of
levels, just those that appear in the data.
When used with continuous variables, you may need to fill in values
that do not appear in the data: to do so use expressions like
\code{year = 2010:2020} or \code{year = full_seq(year,1)}.}
\item{.name_repair}{Treatment of problematic column names:
\itemize{
\item \code{"minimal"}: No name repair or checks, beyond basic existence,
\item \code{"unique"}: Make sure names are unique and not empty,
\item \code{"check_unique"}: (default value), no name repair, but check they are
\code{unique},
\item \code{"universal"}: Make the names \code{unique} and syntactic
\item a function: apply custom name repair (e.g., \code{.name_repair = make.names}
for names in the style of base R).
\item A purrr-style anonymous function, see \code{\link[rlang:as_function]{rlang::as_function()}}
}
This argument is passed on as \code{repair} to \code{\link[vctrs:vec_as_names]{vctrs::vec_as_names()}}.
See there for more details on these terms and the strategies used
to enforce them.}
}
\description{
This is a method for the tidyr \code{expand()} generic. It is translated to
\code{\link[data.table:J]{data.table::CJ()}}.
}
\examples{
library(tidyr)
fruits <- lazy_dt(tibble(
type = c("apple", "orange", "apple", "orange", "orange", "orange"),
year = c(2010, 2010, 2012, 2010, 2010, 2012),
size = factor(
c("XS", "S", "M", "S", "S", "M"),
levels = c("XS", "S", "M", "L")
),
weights = rnorm(6, as.numeric(size) + 2)
))
# All possible combinations ---------------------------------------
# Note that only present levels of the factor variable `size` are retained.
fruits \%>\% expand(type)
fruits \%>\% expand(type, size)
# This is different from the data frame behaviour:
fruits \%>\% dplyr::collect() \%>\% expand(type, size)
# Other uses -------------------------------------------------------
fruits \%>\% expand(type, size, 2010:2012)
# Use `anti_join()` to determine which observations are missing
all <- fruits \%>\% expand(type, size, year)
all
all \%>\% dplyr::anti_join(fruits)
# Use with `right_join()` to fill in missing rows
fruits \%>\% dplyr::right_join(all)
}
dtplyr/man/dot-datatable.aware.Rd 0000644 0001762 0000144 00000000525 14375676067 016464 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/dtplyr-package.R
\docType{data}
\name{.datatable.aware}
\alias{.datatable.aware}
\title{dtplyr is data.table aware}
\format{
An object of class \code{logical} of length 1.
}
\usage{
.datatable.aware
}
\description{
dtplyr is data.table aware
}
\keyword{internal}
dtplyr/man/rename.dtplyr_step.Rd 0000644 0001762 0000144 00000002352 14006775461 016465 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/step-call.R
\name{rename.dtplyr_step}
\alias{rename.dtplyr_step}
\alias{rename_with.dtplyr_step}
\title{Rename columns using their names}
\usage{
\method{rename}{dtplyr_step}(.data, ...)
\method{rename_with}{dtplyr_step}(.data, .fn, .cols = everything(), ...)
}
\arguments{
\item{.data}{A \code{\link[=lazy_dt]{lazy_dt()}}}
\item{...}{For \code{rename()}: <\code{\link[dplyr:dplyr_tidy_select]{tidy-select}}> Use
\code{new_name = old_name} to rename selected variables.
For \code{rename_with()}: additional arguments passed onto \code{.fn}.}
\item{.fn}{A function used to transform the selected \code{.cols}. Should
return a character vector the same length as the input.}
\item{.cols}{<\code{\link[dplyr:dplyr_tidy_select]{tidy-select}}> Columns to rename;
defaults to all columns.}
}
\description{
These are methods for the dplyr generics \code{\link[=rename]{rename()}} and \code{\link[=rename_with]{rename_with()}}.
They are both translated to \code{\link[data.table:setattr]{data.table::setnames()}}.
}
\examples{
library(dplyr, warn.conflicts = FALSE)
dt <- lazy_dt(data.frame(x = 1, y = 2, z = 3))
dt \%>\% rename(new_x = x, new_y = y)
dt \%>\% rename_with(toupper)
}
dtplyr/man/filter.dtplyr_step.Rd 0000644 0001762 0000144 00000002710 14372711230 016467 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/step-subset-filter.R
\name{filter.dtplyr_step}
\alias{filter.dtplyr_step}
\title{Subset rows using column values}
\usage{
\method{filter}{dtplyr_step}(.data, ..., .by = NULL, .preserve = FALSE)
}
\arguments{
\item{.data}{A \code{\link[=lazy_dt]{lazy_dt()}}.}
\item{...}{<\code{\link[dplyr:dplyr_data_masking]{data-masking}}> Expressions that return a
logical value, and are defined in terms of the variables in \code{.data}.
If multiple expressions are included, they are combined with the \code{&} operator.
Only rows for which all conditions evaluate to \code{TRUE} are kept.}
\item{.by}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}}
<\code{\link[dplyr:dplyr_tidy_select]{tidy-select}}> Optionally, a selection of columns to
group by for just this operation, functioning as an alternative to \code{\link[dplyr:group_by]{group_by()}}. For
details and examples, see \link[dplyr:dplyr_by]{?dplyr_by}.}
\item{.preserve}{Ignored}
}
\description{
This is a method for the dplyr \code{\link[=arrange]{arrange()}} generic. It is translated to
the \code{i} argument of \verb{[.data.table}
}
\examples{
library(dplyr, warn.conflicts = FALSE)
dt <- lazy_dt(mtcars)
dt \%>\% filter(cyl == 4)
dt \%>\% filter(vs, am)
dt \%>\%
group_by(cyl) \%>\%
filter(mpg > mean(mpg))
}
dtplyr/man/drop_na.dtplyr_step.Rd 0000644 0001762 0000144 00000001361 14300152547 016626 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/step-call.R
\name{drop_na.dtplyr_step}
\alias{drop_na.dtplyr_step}
\title{Drop rows containing missing values}
\usage{
\method{drop_na}{dtplyr_step}(data, ...)
}
\arguments{
\item{data}{A \code{\link[=lazy_dt]{lazy_dt()}}.}
\item{...}{<\code{\link[tidyr:tidyr_tidy_select]{tidy-select}}> Columns to inspect for
missing values. If empty, all columns are used.}
}
\description{
This is a method for the tidyr \code{drop_na()} generic. It is translated to
\code{data.table::na.omit()}
}
\examples{
library(dplyr)
library(tidyr)
dt <- lazy_dt(tibble(x = c(1, 2, NA), y = c("a", NA, "b")))
dt \%>\% drop_na()
dt \%>\% drop_na(x)
vars <- "y"
dt \%>\% drop_na(x, any_of(vars))
}
dtplyr/man/summarise.dtplyr_step.Rd 0000644 0001762 0000144 00000005545 14372711230 017220 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/step-subset-summarise.R
\name{summarise.dtplyr_step}
\alias{summarise.dtplyr_step}
\title{Summarise each group to one row}
\usage{
\method{summarise}{dtplyr_step}(.data, ..., .by = NULL, .groups = NULL)
}
\arguments{
\item{.data}{A \code{\link[=lazy_dt]{lazy_dt()}}.}
\item{...}{<\code{\link[dplyr:dplyr_data_masking]{data-masking}}> Name-value pairs of summary
functions. The name will be the name of the variable in the result.
The value can be:
\itemize{
\item A vector of length 1, e.g. \code{min(x)}, \code{n()}, or \code{sum(is.na(y))}.
\item A data frame, to add multiple columns from a single expression.
}
\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} Returning values with size 0 or >1 was
deprecated as of 1.1.0. Please use \code{\link[dplyr:reframe]{reframe()}} for this instead.}
\item{.by}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}}
<\code{\link[dplyr:dplyr_tidy_select]{tidy-select}}> Optionally, a selection of columns to
group by for just this operation, functioning as an alternative to \code{\link[dplyr:group_by]{group_by()}}. For
details and examples, see \link[dplyr:dplyr_by]{?dplyr_by}.}
\item{.groups}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} Grouping structure of the
result.
\itemize{
\item "drop_last": dropping the last level of grouping. This was the
only supported option before version 1.0.0.
\item "drop": All levels of grouping are dropped.
\item "keep": Same grouping structure as \code{.data}.
\item "rowwise": Each row is its own group.
}
When \code{.groups} is not specified, it is chosen
based on the number of rows of the results:
\itemize{
\item If all the results have 1 row, you get "drop_last".
\item If the number of rows varies, you get "keep" (note that returning a
variable number of rows was deprecated in favor of \code{\link[dplyr:reframe]{reframe()}}, which
also unconditionally drops all levels of grouping).
}
In addition, a message informs you of that choice, unless the result is ungrouped,
the option "dplyr.summarise.inform" is set to \code{FALSE},
or when \code{summarise()} is called from a function in a package.}
}
\description{
This is a method for the dplyr \code{\link[=summarise]{summarise()}} generic. It is translated to
the \code{j} argument of \verb{[.data.table}.
}
\examples{
library(dplyr, warn.conflicts = FALSE)
dt <- lazy_dt(mtcars)
dt \%>\%
group_by(cyl) \%>\%
summarise(vs = mean(vs))
dt \%>\%
group_by(cyl) \%>\%
summarise(across(disp:wt, mean))
}
dtplyr/man/lazy_dt.Rd 0000644 0001762 0000144 00000005473 14007000430 014275 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/step-first.R
\name{lazy_dt}
\alias{lazy_dt}
\alias{tbl_dt}
\alias{grouped_dt}
\title{Create a "lazy" data.table for use with dplyr verbs}
\usage{
lazy_dt(x, name = NULL, immutable = TRUE, key_by = NULL)
}
\arguments{
\item{x}{A data table (or something can can be coerced to a data table).}
\item{name}{Optionally, supply a name to be used in generated expressions.
For expert use only.}
\item{immutable}{If \code{TRUE}, \code{x} is treated as immutable and will never
be modified by any code generated by dtplyr. Alternatively, you can set
\code{immutable = FALSE} to allow dtplyr to modify the input object.}
\item{key_by}{Set keys for data frame, using \code{\link[=select]{select()}} semantics (e.g.
\code{key_by = c(key1, key2)}.
This uses \code{\link[data.table:setkey]{data.table::setkey()}} to sort the table and build an index.
This will considerably improve performance for subsets, summaries, and
joins that use the keys.
See \code{vignette("datatable-keys-fast-subset")} for more details.}
}
\description{
A lazy data.table lazy captures the intent of dplyr verbs, only actually
performing computation when requested (with \code{\link[=collect]{collect()}}, \code{\link[=pull]{pull()}},
\code{\link[=as.data.frame]{as.data.frame()}}, \code{\link[data.table:as.data.table]{data.table::as.data.table()}}, or \code{\link[tibble:as_tibble]{tibble::as_tibble()}}).
This allows dtplyr to convert dplyr verbs into as few data.table expressions
as possible, which leads to a high performance translation.
See \code{vignette("translation")} for the details of the translation.
}
\examples{
library(dplyr, warn.conflicts = FALSE)
# If you have a data.table, using it with any dplyr generic will
# automatically convert it to a lazy_dt object
dt <- data.table::data.table(x = 1:10, y = 10:1)
dt \%>\% filter(x == y)
dt \%>\% mutate(z = x + y)
# Note that dtplyr will avoid mutating the input data.table, so the
# previous translation includes an automatic copy(). You can avoid this
# with a manual call to lazy_dt()
dt \%>\%
lazy_dt(immutable = FALSE) \%>\%
mutate(z = x + y)
# If you have a data frame, you can use lazy_dt() to convert it to
# a data.table:
mtcars2 <- lazy_dt(mtcars)
mtcars2
mtcars2 \%>\% select(mpg:cyl)
mtcars2 \%>\% select(x = mpg, y = cyl)
mtcars2 \%>\% filter(cyl == 4) \%>\% select(mpg)
mtcars2 \%>\% select(mpg, cyl) \%>\% filter(cyl == 4)
mtcars2 \%>\% mutate(cyl2 = cyl * 2, cyl4 = cyl2 * 2)
mtcars2 \%>\% transmute(cyl2 = cyl * 2, vs2 = vs * 2)
mtcars2 \%>\% filter(cyl == 8) \%>\% mutate(cyl2 = cyl * 2)
# Learn more about translation in vignette("translation")
by_cyl <- mtcars2 \%>\% group_by(cyl)
by_cyl \%>\% summarise(mpg = mean(mpg))
by_cyl \%>\% mutate(mpg = mean(mpg))
by_cyl \%>\%
filter(mpg < mean(mpg)) \%>\%
summarise(hp = mean(hp))
}
dtplyr/man/intersect.dtplyr_step.Rd 0000644 0001762 0000144 00000002067 14006775461 017221 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/step-set.R
\name{intersect.dtplyr_step}
\alias{intersect.dtplyr_step}
\alias{union.dtplyr_step}
\alias{union_all.dtplyr_step}
\alias{setdiff.dtplyr_step}
\title{Set operations}
\usage{
\method{intersect}{dtplyr_step}(x, y, ...)
\method{union}{dtplyr_step}(x, y, ...)
\method{union_all}{dtplyr_step}(x, y, ...)
\method{setdiff}{dtplyr_step}(x, y, ...)
}
\arguments{
\item{x, y}{A pair of \code{\link[=lazy_dt]{lazy_dt()}}s.}
\item{...}{Ignored}
}
\description{
These are methods for the dplyr generics \code{\link[=intersect]{intersect()}}, \code{\link[=union]{union()}},
\code{\link[=union_all]{union_all()}}, and \code{\link[=setdiff]{setdiff()}}. They are translated to
\code{\link[data.table:setops]{data.table::fintersect()}}, \code{\link[data.table:setops]{data.table::funion()}}, and
\code{\link[data.table:setops]{data.table::fsetdiff()}}.
}
\examples{
dt1 <- lazy_dt(data.frame(x = 1:4))
dt2 <- lazy_dt(data.frame(x = c(2, 4, 6)))
intersect(dt1, dt2)
union(dt1, dt2)
setdiff(dt1, dt2)
}
dtplyr/man/group_modify.dtplyr_step.Rd 0000644 0001762 0000144 00000002555 14406335651 017723 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/step-modify.R
\name{group_modify.dtplyr_step}
\alias{group_modify.dtplyr_step}
\alias{group_map.dtplyr_step}
\title{Apply a function to each group}
\usage{
\method{group_modify}{dtplyr_step}(.data, .f, ..., keep = FALSE)
\method{group_map}{dtplyr_step}(.data, .f, ..., keep = FALSE)
}
\arguments{
\item{.data}{A \code{\link[=lazy_dt]{lazy_dt()}}}
\item{.f}{The name of a two argument function. The first argument is passed
\code{.SD},the data.table representing the current group; the second argument
is passed \code{.BY}, a list giving the current values of the grouping
variables. The function should return a list or data.table.}
\item{...}{Additional arguments passed to \code{.f}}
\item{keep}{Not supported for \link{lazy_dt}.}
}
\value{
\code{group_map()} applies \code{.f} to each group, returning a list.
\code{group_modify()} replaces each group with the results of \code{.f}, returning a
modified \code{\link[=lazy_dt]{lazy_dt()}}.
}
\description{
These are methods for the dplyr \code{\link[=group_map]{group_map()}} and \code{\link[=group_modify]{group_modify()}} generics.
They are both translated to \verb{[.data.table}.
}
\examples{
library(dplyr)
dt <- lazy_dt(mtcars)
dt \%>\%
group_by(cyl) \%>\%
group_modify(head, n = 2L)
dt \%>\%
group_by(cyl) \%>\%
group_map(head, n = 2L)
}
dtplyr/man/relocate.dtplyr_step.Rd 0000644 0001762 0000144 00000001745 14372711230 017007 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/step-colorder-relocate.R
\name{relocate.dtplyr_step}
\alias{relocate.dtplyr_step}
\title{Relocate variables using their names}
\usage{
\method{relocate}{dtplyr_step}(.data, ..., .before = NULL, .after = NULL)
}
\arguments{
\item{.data}{A \code{\link[=lazy_dt]{lazy_dt()}}.}
\item{...}{<\code{\link[dplyr:dplyr_tidy_select]{tidy-select}}> Columns to move.}
\item{.before, .after}{<\code{\link[dplyr:dplyr_tidy_select]{tidy-select}}> Destination of
columns selected by \code{...}. Supplying neither will move columns to the
left-hand side; specifying both is an error.}
}
\description{
This is a method for the dplyr \code{\link[=relocate]{relocate()}} generic. It is translated to
the \code{j} argument of \verb{[.data.table}.
}
\examples{
library(dplyr, warn.conflicts = FALSE)
dt <- lazy_dt(data.frame(x = 1, y = 2, z = 3))
dt \%>\% relocate(z)
dt \%>\% relocate(y, .before = x)
dt \%>\% relocate(y, .after = y)
}
dtplyr/man/distinct.dtplyr_step.Rd 0000644 0001762 0000144 00000002230 14372711230 017020 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/step-call.R
\name{distinct.dtplyr_step}
\alias{distinct.dtplyr_step}
\title{Subset distinct/unique rows}
\usage{
\method{distinct}{dtplyr_step}(.data, ..., .keep_all = FALSE)
}
\arguments{
\item{.data}{A \code{\link[=lazy_dt]{lazy_dt()}}}
\item{...}{<\code{\link[dplyr:dplyr_data_masking]{data-masking}}> Optional variables to use
when determining uniqueness. If there are multiple rows for a given
combination of inputs, only the first row will be preserved. If omitted,
will use all variables in the data frame.}
\item{.keep_all}{If \code{TRUE}, keep all variables in \code{.data}.
If a combination of \code{...} is not distinct, this keeps the
first row of values.}
}
\description{
This is a method for the dplyr \code{\link[=distinct]{distinct()}} generic. It is translated to
\code{\link[data.table:duplicated]{data.table::unique.data.table()}}.
}
\examples{
library(dplyr, warn.conflicts = FALSE)
df <- lazy_dt(data.frame(
x = sample(10, 100, replace = TRUE),
y = sample(10, 100, replace = TRUE)
))
df \%>\% distinct(x)
df \%>\% distinct(x, y)
df \%>\% distinct(x, .keep_all = TRUE)
}
dtplyr/man/left_join.dtplyr_step.Rd 0000644 0001762 0000144 00000007120 14372711230 017153 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/step-join.R
\name{left_join.dtplyr_step}
\alias{left_join.dtplyr_step}
\title{Join data tables}
\usage{
\method{left_join}{dtplyr_step}(x, y, ..., by = NULL, copy = FALSE, suffix = c(".x", ".y"))
}
\arguments{
\item{x, y}{A pair of \code{\link[=lazy_dt]{lazy_dt()}}s.}
\item{...}{Other parameters passed onto methods.}
\item{by}{A join specification created with \code{\link[dplyr:join_by]{join_by()}}, or a character
vector of variables to join by.
If \code{NULL}, the default, \verb{*_join()} will perform a natural join, using all
variables in common across \code{x} and \code{y}. A message lists the variables so
that you can check they're correct; suppress the message by supplying \code{by}
explicitly.
To join on different variables between \code{x} and \code{y}, use a \code{\link[dplyr:join_by]{join_by()}}
specification. For example, \code{join_by(a == b)} will match \code{x$a} to \code{y$b}.
To join by multiple variables, use a \code{\link[dplyr:join_by]{join_by()}} specification with
multiple expressions. For example, \code{join_by(a == b, c == d)} will match
\code{x$a} to \code{y$b} and \code{x$c} to \code{y$d}. If the column names are the same between
\code{x} and \code{y}, you can shorten this by listing only the variable names, like
\code{join_by(a, c)}.
\code{\link[dplyr:join_by]{join_by()}} can also be used to perform inequality, rolling, and overlap
joins. See the documentation at \link[dplyr:join_by]{?join_by} for details on
these types of joins.
For simple equality joins, you can alternatively specify a character vector
of variable names to join by. For example, \code{by = c("a", "b")} joins \code{x$a}
to \code{y$a} and \code{x$b} to \code{y$b}. If variable names differ between \code{x} and \code{y},
use a named character vector like \code{by = c("x_a" = "y_a", "x_b" = "y_b")}.
To perform a cross-join, generating all combinations of \code{x} and \code{y}, see
\code{\link[dplyr:cross_join]{cross_join()}}.}
\item{copy}{If \code{x} and \code{y} are not from the same data source,
and \code{copy} is \code{TRUE}, then \code{y} will be copied into the
same src as \code{x}. This allows you to join tables across srcs, but
it is a potentially expensive operation so you must opt into it.}
\item{suffix}{If there are non-joined duplicate variables in \code{x} and
\code{y}, these suffixes will be added to the output to disambiguate them.
Should be a character vector of length 2.}
}
\description{
These are methods for the dplyr generics \code{\link[=left_join]{left_join()}}, \code{\link[=right_join]{right_join()}},
\code{\link[=inner_join]{inner_join()}}, \code{\link[=full_join]{full_join()}}, \code{\link[=anti_join]{anti_join()}}, and \code{\link[=semi_join]{semi_join()}}. Left, right,
inner, and anti join are translated to the \verb{[.data.table} equivalent,
full joins to \code{\link[data.table:merge]{data.table::merge.data.table()}}.
Left, right, and full joins are in some cases followed by calls to
\code{\link[data.table:setcolorder]{data.table::setcolorder()}} and \code{\link[data.table:setattr]{data.table::setnames()}} to ensure that column
order and names match dplyr conventions.
Semi-joins don't have a direct data.table equivalent.
}
\examples{
library(dplyr, warn.conflicts = FALSE)
band_dt <- lazy_dt(dplyr::band_members)
instrument_dt <- lazy_dt(dplyr::band_instruments)
band_dt \%>\% left_join(instrument_dt)
band_dt \%>\% right_join(instrument_dt)
band_dt \%>\% inner_join(instrument_dt)
band_dt \%>\% full_join(instrument_dt)
band_dt \%>\% semi_join(instrument_dt)
band_dt \%>\% anti_join(instrument_dt)
}
dtplyr/man/figures/ 0000755 0001762 0000144 00000000000 14004642135 014006 5 ustar ligges users dtplyr/man/figures/logo.png 0000644 0001762 0000144 00000136617 14004642135 015472 0 ustar ligges users PNG
IHDR ޫh gAMA a cHRM z&