dtplyr/ 0000755 0001762 0000144 00000000000 14172110611 011562 5 ustar ligges users dtplyr/NAMESPACE 0000644 0001762 0000144 00000011216 14172100676 013014 0 ustar ligges users # Generated by roxygen2: do not edit by hand
S3method(anti_join,data.table)
S3method(anti_join,dtplyr_step)
S3method(arrange,data.table)
S3method(arrange,dtplyr_step)
S3method(as.data.frame,dtplyr_step)
S3method(as.data.frame,foo)
S3method(as.data.table,dtplyr_step)
S3method(as_tibble,dtplyr_step)
S3method(auto_copy,dtplyr_step)
S3method(collect,dtplyr_step)
S3method(compute,dtplyr_step)
S3method(count,data.table)
S3method(count,dtplyr_step)
S3method(dim,dtplyr_step)
S3method(dim,dtplyr_step_first)
S3method(distinct,data.table)
S3method(distinct,dtplyr_step)
S3method(do,data.table)
S3method(do,dtplyr_step)
S3method(dt_call,dtplyr_step)
S3method(dt_call,dtplyr_step_assign)
S3method(dt_call,dtplyr_step_first)
S3method(dt_call,dtplyr_step_join)
S3method(dt_call,dtplyr_step_modify)
S3method(dt_call,dtplyr_step_set)
S3method(dt_call,dtplyr_step_subset)
S3method(dt_has_computation,dtplyr_step)
S3method(dt_has_computation,dtplyr_step_first)
S3method(dt_has_computation,dtplyr_step_group)
S3method(dt_sources,dtplyr_step)
S3method(dt_sources,dtplyr_step_first)
S3method(dt_sources,dtplyr_step_join)
S3method(dt_sources,dtplyr_step_set)
S3method(dt_sources,dtplyr_step_subset)
S3method(full_join,data.table)
S3method(full_join,dtplyr_step)
S3method(glimpse,dtplyr_step)
S3method(group_by,data.table)
S3method(group_by,dtplyr_step)
S3method(group_map,dtplyr_step)
S3method(group_modify,dtplyr_step)
S3method(group_size,dtplyr_step)
S3method(group_vars,dtplyr_step)
S3method(groups,dtplyr_step)
S3method(head,dtplyr_step)
S3method(inner_join,data.table)
S3method(inner_join,dtplyr_step)
S3method(left_join,data.table)
S3method(left_join,dtplyr_step)
S3method(mutate,data.table)
S3method(mutate,dtplyr_step)
S3method(n_groups,dtplyr_step)
S3method(print,dtplyr_step)
S3method(pull,dtplyr_step)
S3method(relocate,data.table)
S3method(relocate,dtplyr_step)
S3method(rename,data.table)
S3method(rename,dtplyr_step)
S3method(rename_with,data.table)
S3method(rename_with,dtplyr_step)
S3method(right_join,data.table)
S3method(right_join,dtplyr_step)
S3method(same_src,dtplyr_step)
S3method(sample_frac,data.table)
S3method(sample_frac,dtplyr_step)
S3method(sample_n,data.table)
S3method(sample_n,dtplyr_step)
S3method(select,data.table)
S3method(select,dtplyr_step)
S3method(semi_join,data.table)
S3method(semi_join,dtplyr_step)
S3method(show_query,dtplyr_step)
S3method(slice,data.table)
S3method(slice,dtplyr_step)
S3method(slice_head,data.table)
S3method(slice_head,dtplyr_step)
S3method(slice_max,data.table)
S3method(slice_max,dtplyr_step)
S3method(slice_min,data.table)
S3method(slice_min,dtplyr_step)
S3method(slice_sample,data.table)
S3method(slice_sample,dtplyr_step)
S3method(slice_tail,data.table)
S3method(slice_tail,dtplyr_step)
S3method(summarise,data.table)
S3method(summarise,dtplyr_step)
S3method(tail,dtplyr_step)
S3method(tally,data.table)
S3method(tally,dtplyr_step)
S3method(tbl_vars,dtplyr_step)
S3method(tbl_vars,foo)
S3method(transmute,data.table)
S3method(transmute,dtplyr_step)
S3method(ungroup,data.table)
S3method(ungroup,dtplyr_step)
S3method(union_all,data.table)
S3method(union_all,dtplyr_step)
S3method(unique,dtplyr_step)
export(.datatable.aware)
export(lazy_dt)
import(rlang)
importFrom(data.table,as.data.table)
importFrom(data.table,data.table)
importFrom(data.table,is.data.table)
importFrom(dplyr,anti_join)
importFrom(dplyr,arrange)
importFrom(dplyr,auto_copy)
importFrom(dplyr,collect)
importFrom(dplyr,compute)
importFrom(dplyr,count)
importFrom(dplyr,distinct)
importFrom(dplyr,do)
importFrom(dplyr,filter)
importFrom(dplyr,full_join)
importFrom(dplyr,glimpse)
importFrom(dplyr,group_by)
importFrom(dplyr,group_map)
importFrom(dplyr,group_modify)
importFrom(dplyr,group_size)
importFrom(dplyr,group_vars)
importFrom(dplyr,groups)
importFrom(dplyr,inner_join)
importFrom(dplyr,intersect)
importFrom(dplyr,left_join)
importFrom(dplyr,mutate)
importFrom(dplyr,n_groups)
importFrom(dplyr,pull)
importFrom(dplyr,relocate)
importFrom(dplyr,rename)
importFrom(dplyr,rename_with)
importFrom(dplyr,right_join)
importFrom(dplyr,same_src)
importFrom(dplyr,sample_frac)
importFrom(dplyr,sample_n)
importFrom(dplyr,select)
importFrom(dplyr,semi_join)
importFrom(dplyr,setdiff)
importFrom(dplyr,show_query)
importFrom(dplyr,slice)
importFrom(dplyr,slice_head)
importFrom(dplyr,slice_max)
importFrom(dplyr,slice_min)
importFrom(dplyr,slice_sample)
importFrom(dplyr,slice_tail)
importFrom(dplyr,summarise)
importFrom(dplyr,tally)
importFrom(dplyr,tbl_vars)
importFrom(dplyr,transmute)
importFrom(dplyr,ungroup)
importFrom(dplyr,union)
importFrom(dplyr,union_all)
importFrom(glue,glue)
importFrom(lifecycle,deprecated)
importFrom(tibble,as_tibble)
importFrom(tidyselect,everything)
importFrom(utils,head)
importFrom(utils,tail)
dtplyr/LICENSE 0000644 0001762 0000144 00000000054 14004642135 012573 0 ustar ligges users YEAR: 2020
COPYRIGHT HOLDER: dtplyr authors
dtplyr/README.md 0000644 0001762 0000144 00000011100 14152441602 013037 0 ustar ligges users
# dtplyr
[](https://cran.r-project.org/package=dtplyr)
[](https://github.com/tidyverse/dtplyr/actions)
[](https://app.codecov.io/gh/tidyverse/dtplyr?branch=main)
## Overview
dtplyr provides a [data.table](http://r-datatable.com/) backend for
dplyr. The goal of dtplyr is to allow you to write dplyr code that is
automatically translated to the equivalent, but usually much faster,
data.table code.
Compared to the previous release, this version of dtplyr is a complete
rewrite that focusses only on lazy evaluation triggered by use of
`lazy_dt()`. This means that no computation is performed until you
explicitly request it with `as.data.table()`, `as.data.frame()` or
`as_tibble()`. This has a considerable advantage over the previous
version (which eagerly evaluated each step) because it allows dtplyr to
generate significantly more performant translations. This is a large
change that breaks all existing uses of dtplyr. But frankly, dtplyr was
pretty useless before because it did such a bad job of generating
data.table code. Fortunately few people used it, so a major overhaul was
possible.
See `vignette("translation")` for details of the current translations,
and [table.express](https://github.com/asardaes/table.express) and
[rqdatatable](https://github.com/WinVector/rqdatatable/) for related
work.
## Installation
You can install from CRAN with:
``` r
install.packages("dtplyr")
```
Or try the development version from GitHub with:
``` r
# install.packages("devtools")
devtools::install_github("tidyverse/dtplyr")
```
## Usage
To use dtplyr, you must at least load dtplyr and dplyr. You may also
want to load [data.table](http://r-datatable.com/) so you can access the
other goodies that it provides:
``` r
library(data.table)
library(dtplyr)
library(dplyr, warn.conflicts = FALSE)
```
Then use `lazy_dt()` to create a “lazy” data table that tracks the
operations performed on it.
``` r
mtcars2 <- lazy_dt(mtcars)
```
You can preview the transformation (including the generated data.table
code) by printing the result:
``` r
mtcars2 %>%
filter(wt < 5) %>%
mutate(l100k = 235.21 / mpg) %>% # liters / 100 km
group_by(cyl) %>%
summarise(l100k = mean(l100k))
#> Source: local data table [3 x 2]
#> Call: `_DT1`[wt < 5][, `:=`(l100k = 235.21/mpg)][, .(l100k = mean(l100k)),
#> keyby = .(cyl)]
#>
#> cyl l100k
#>
#> 1 4 9.05
#> 2 6 12.0
#> 3 8 14.9
#>
#> # Use as.data.table()/as.data.frame()/as_tibble() to access results
```
But generally you should reserve this only for debugging, and use
`as.data.table()`, `as.data.frame()`, or `as_tibble()` to indicate that
you’re done with the transformation and want to access the results:
``` r
mtcars2 %>%
filter(wt < 5) %>%
mutate(l100k = 235.21 / mpg) %>% # liters / 100 km
group_by(cyl) %>%
summarise(l100k = mean(l100k)) %>%
as_tibble()
#> # A tibble: 3 × 2
#> cyl l100k
#>
#> 1 4 9.05
#> 2 6 12.0
#> 3 8 14.9
```
## Why is dtplyr slower than data.table?
There are three primary reasons that dtplyr will always be somewhat
slower than data.table:
- Each dplyr verb must do some work to convert dplyr syntax to
data.table syntax. This takes time proportional to the complexity of
the input code, not the input *data*, so should be a negligible
overhead for large datasets. [Initial
benchmarks](https://dtplyr.tidyverse.org/articles/translation.html#performance)
suggest that the overhead should be under 1ms per dplyr call.
- Some data.table expressions have no direct dplyr equivalent. For
example, there’s no way to express cross- or rolling-joins with
dplyr.
- To match dplyr semantics, `mutate()` does not modify in place by
default. This means that most expressions involving `mutate()` must
make a copy that would not be necessary if you were using data.table
directly. (You can opt out of this behaviour in `lazy_dt()` with
`immutable = FALSE`).
## Code of Conduct
Please note that the dtplyr project is released with a [Contributor Code
of Conduct](https://dtplyr.tidyverse.org/CODE_OF_CONDUCT.html). By
contributing to this project, you agree to abide by its terms.
dtplyr/man/ 0000755 0001762 0000144 00000000000 14172101033 012333 5 ustar ligges users dtplyr/man/mutate.dtplyr_step.Rd 0000644 0001762 0000144 00000002636 14126601265 016513 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/step-mutate.R
\name{mutate.dtplyr_step}
\alias{mutate.dtplyr_step}
\title{Create and modify columns}
\usage{
\method{mutate}{dtplyr_step}(.data, ..., .before = NULL, .after = NULL)
}
\arguments{
\item{.data}{A \code{\link[=lazy_dt]{lazy_dt()}}.}
\item{...}{<\link[dplyr:dplyr_data_masking]{data-masking}> Name-value pairs.
The name gives the name of the column in the output, and the value should
evaluate to a vector.}
\item{.before, .after}{\Sexpr[results=rd]{lifecycle::badge("experimental")}
<\code{\link[=dplyr_tidy_select]{tidy-select}}> Optionally, control where new columns
should appear (the default is to add to the right hand side). See
\code{\link[=relocate]{relocate()}} for more details.}
}
\description{
This is a method for the dplyr \code{\link[=mutate]{mutate()}} generic. It is translated to
the \code{j} argument of \verb{[.data.table}, using \verb{:=} to modify "in place". If
\code{.before} or \code{.after} is provided, the new columns are relocated with a call
to \code{\link[data.table:setcolorder]{data.table::setcolorder()}}.
}
\examples{
library(dplyr, warn.conflicts = FALSE)
dt <- lazy_dt(data.frame(x = 1:5, y = 5:1))
dt \%>\%
mutate(a = (x + y) / 2, b = sqrt(x^2 + y^2))
# It uses a more sophisticated translation when newly created variables
# are used in the same expression
dt \%>\%
mutate(x1 = x + 1, x2 = x1 + 1)
}
dtplyr/man/expand.dtplyr_step.Rd 0000644 0001762 0000144 00000006151 14150760302 016462 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/step-subset-expand.R
\name{expand.dtplyr_step}
\alias{expand.dtplyr_step}
\title{Expand data frame to include all possible combinations of values.}
\usage{
\method{expand}{dtplyr_step}(data, ..., .name_repair = "check_unique")
}
\arguments{
\item{data}{A \code{\link[=lazy_dt]{lazy_dt()}}.}
\item{...}{Specification of columns to expand. Columns can be atomic vectors
or lists.
\itemize{
\item To find all unique combinations of \code{x}, \code{y} and \code{z}, including those not
present in the data, supply each variable as a separate argument:
\code{expand(df, x, y, z)}.
\item To find only the combinations that occur in the
data, use \code{nesting}: \code{expand(df, nesting(x, y, z))}.
\item You can combine the two forms. For example,
\code{expand(df, nesting(school_id, student_id), date)} would produce
a row for each present school-student combination for all possible
dates.
}
Unlike the data.frame method, this method does not use the full set of
levels, just those that appear in the data.
When used with continuous variables, you may need to fill in values
that do not appear in the data: to do so use expressions like
\code{year = 2010:2020} or \code{year = full_seq(year,1)}.}
\item{.name_repair}{Treatment of problematic column names:
\itemize{
\item \code{"minimal"}: No name repair or checks, beyond basic existence,
\item \code{"unique"}: Make sure names are unique and not empty,
\item \code{"check_unique"}: (default value), no name repair, but check they are
\code{unique},
\item \code{"universal"}: Make the names \code{unique} and syntactic
\item a function: apply custom name repair (e.g., \code{.name_repair = make.names}
for names in the style of base R).
\item A purrr-style anonymous function, see \code{\link[rlang:as_function]{rlang::as_function()}}
}
This argument is passed on as \code{repair} to \code{\link[vctrs:vec_as_names]{vctrs::vec_as_names()}}.
See there for more details on these terms and the strategies used
to enforce them.}
}
\description{
This is a method for the tidyr \code{expand()} generic. It is translated to
\code{\link[data.table:J]{data.table::CJ()}}.
}
\examples{
library(tidyr)
fruits <- lazy_dt(tibble(
type = c("apple", "orange", "apple", "orange", "orange", "orange"),
year = c(2010, 2010, 2012, 2010, 2010, 2012),
size = factor(
c("XS", "S", "M", "S", "S", "M"),
levels = c("XS", "S", "M", "L")
),
weights = rnorm(6, as.numeric(size) + 2)
))
# All possible combinations ---------------------------------------
# Note that only present levels of the factor variable `size` are retained.
fruits \%>\% expand(type)
fruits \%>\% expand(type, size)
# This is different from the data frame behaviour:
fruits \%>\% dplyr::collect() \%>\% expand(type, size)
# Other uses -------------------------------------------------------
fruits \%>\% expand(type, size, 2010:2012)
# Use `anti_join()` to determine which observations are missing
all <- fruits \%>\% expand(type, size, year)
all
all \%>\% dplyr::anti_join(fruits)
# Use with `right_join()` to fill in missing rows
fruits \%>\% dplyr::right_join(all)
}
dtplyr/man/rename.dtplyr_step.Rd 0000644 0001762 0000144 00000002352 14006775461 016465 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/step-call.R
\name{rename.dtplyr_step}
\alias{rename.dtplyr_step}
\alias{rename_with.dtplyr_step}
\title{Rename columns using their names}
\usage{
\method{rename}{dtplyr_step}(.data, ...)
\method{rename_with}{dtplyr_step}(.data, .fn, .cols = everything(), ...)
}
\arguments{
\item{.data}{A \code{\link[=lazy_dt]{lazy_dt()}}}
\item{...}{For \code{rename()}: <\code{\link[dplyr:dplyr_tidy_select]{tidy-select}}> Use
\code{new_name = old_name} to rename selected variables.
For \code{rename_with()}: additional arguments passed onto \code{.fn}.}
\item{.fn}{A function used to transform the selected \code{.cols}. Should
return a character vector the same length as the input.}
\item{.cols}{<\code{\link[dplyr:dplyr_tidy_select]{tidy-select}}> Columns to rename;
defaults to all columns.}
}
\description{
These are methods for the dplyr generics \code{\link[=rename]{rename()}} and \code{\link[=rename_with]{rename_with()}}.
They are both translated to \code{\link[data.table:setattr]{data.table::setnames()}}.
}
\examples{
library(dplyr, warn.conflicts = FALSE)
dt <- lazy_dt(data.frame(x = 1, y = 2, z = 3))
dt \%>\% rename(new_x = x, new_y = y)
dt \%>\% rename_with(toupper)
}
dtplyr/man/filter.dtplyr_step.Rd 0000644 0001762 0000144 00000001765 14006777613 016513 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/step-subset-filter.R
\name{filter.dtplyr_step}
\alias{filter.dtplyr_step}
\title{Subset rows using column values}
\usage{
\method{filter}{dtplyr_step}(.data, ..., .preserve = FALSE)
}
\arguments{
\item{.data}{A \code{\link[=lazy_dt]{lazy_dt()}}.}
\item{...}{<\code{\link[dplyr:dplyr_data_masking]{data-masking}}> Expressions that return a
logical value, and are defined in terms of the variables in \code{.data}.
If multiple expressions are included, they are combined with the \code{&} operator.
Only rows for which all conditions evaluate to \code{TRUE} are kept.}
\item{.preserve}{Ignored}
}
\description{
This is a method for the dplyr \code{\link[=arrange]{arrange()}} generic. It is translated to
the \code{i} argument of \verb{[.data.table}
}
\examples{
library(dplyr, warn.conflicts = FALSE)
dt <- lazy_dt(mtcars)
dt \%>\% filter(cyl == 4)
dt \%>\% filter(vs, am)
dt \%>\%
group_by(cyl) \%>\%
filter(mpg > mean(mpg))
}
dtplyr/man/drop_na.dtplyr_step.Rd 0000644 0001762 0000144 00000001321 14021424751 016621 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/step-call.R
\name{drop_na.dtplyr_step}
\alias{drop_na.dtplyr_step}
\title{Drop rows containing missing values}
\usage{
\method{drop_na}{dtplyr_step}(data, ...)
}
\arguments{
\item{data}{A \code{\link[=lazy_dt]{lazy_dt()}}.}
\item{...}{<\code{\link[tidyr:tidyr_tidy_select]{tidy-select}}> Columns to inspect for
missing values.}
}
\description{
This is a method for the tidyr \code{drop_na()} generic. It is translated to
\code{data.table::na.omit()}
}
\examples{
library(dplyr)
library(tidyr)
dt <- lazy_dt(tibble(x = c(1, 2, NA), y = c("a", NA, "b")))
dt \%>\% drop_na()
dt \%>\% drop_na(x)
vars <- "y"
dt \%>\% drop_na(x, any_of(vars))
}
dtplyr/man/summarise.dtplyr_step.Rd 0000644 0001762 0000144 00000003266 14126601265 017221 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/step-subset-summarise.R
\name{summarise.dtplyr_step}
\alias{summarise.dtplyr_step}
\title{Summarise each group to one row}
\usage{
\method{summarise}{dtplyr_step}(.data, ..., .groups = NULL)
}
\arguments{
\item{.data}{A \code{\link[=lazy_dt]{lazy_dt()}}.}
\item{...}{<\code{\link[dplyr:dplyr_data_masking]{data-masking}}> Name-value pairs of summary
functions. The name will be the name of the variable in the result.
The value can be:
\itemize{
\item A vector of length 1, e.g. \code{min(x)}, \code{n()}, or \code{sum(is.na(y))}.
\item A vector of length \code{n}, e.g. \code{quantile()}.
\item A data frame, to add multiple columns from a single expression.
}}
\item{.groups}{\Sexpr[results=rd]{lifecycle::badge("experimental")} Grouping structure of the result.
\itemize{
\item "drop_last": dropping the last level of grouping. This was the
only supported option before version 1.0.0.
\item "drop": All levels of grouping are dropped.
\item "keep": Same grouping structure as \code{.data}.
}
When \code{.groups} is not specified, it defaults to "drop_last".
In addition, a message informs you of that choice, unless the result is ungrouped,
the option "dplyr.summarise.inform" is set to \code{FALSE},
or when \code{summarise()} is called from a function in a package.}
}
\description{
This is a method for the dplyr \code{\link[=summarise]{summarise()}} generic. It is translated to
the \code{j} argument of \verb{[.data.table}.
}
\examples{
library(dplyr, warn.conflicts = FALSE)
dt <- lazy_dt(mtcars)
dt \%>\%
group_by(cyl) \%>\%
summarise(vs = mean(vs))
dt \%>\%
group_by(cyl) \%>\%
summarise(across(disp:wt, mean))
}
dtplyr/man/lazy_dt.Rd 0000644 0001762 0000144 00000005473 14007000430 014275 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/step-first.R
\name{lazy_dt}
\alias{lazy_dt}
\alias{tbl_dt}
\alias{grouped_dt}
\title{Create a "lazy" data.table for use with dplyr verbs}
\usage{
lazy_dt(x, name = NULL, immutable = TRUE, key_by = NULL)
}
\arguments{
\item{x}{A data table (or something can can be coerced to a data table).}
\item{name}{Optionally, supply a name to be used in generated expressions.
For expert use only.}
\item{immutable}{If \code{TRUE}, \code{x} is treated as immutable and will never
be modified by any code generated by dtplyr. Alternatively, you can set
\code{immutable = FALSE} to allow dtplyr to modify the input object.}
\item{key_by}{Set keys for data frame, using \code{\link[=select]{select()}} semantics (e.g.
\code{key_by = c(key1, key2)}.
This uses \code{\link[data.table:setkey]{data.table::setkey()}} to sort the table and build an index.
This will considerably improve performance for subsets, summaries, and
joins that use the keys.
See \code{vignette("datatable-keys-fast-subset")} for more details.}
}
\description{
A lazy data.table lazy captures the intent of dplyr verbs, only actually
performing computation when requested (with \code{\link[=collect]{collect()}}, \code{\link[=pull]{pull()}},
\code{\link[=as.data.frame]{as.data.frame()}}, \code{\link[data.table:as.data.table]{data.table::as.data.table()}}, or \code{\link[tibble:as_tibble]{tibble::as_tibble()}}).
This allows dtplyr to convert dplyr verbs into as few data.table expressions
as possible, which leads to a high performance translation.
See \code{vignette("translation")} for the details of the translation.
}
\examples{
library(dplyr, warn.conflicts = FALSE)
# If you have a data.table, using it with any dplyr generic will
# automatically convert it to a lazy_dt object
dt <- data.table::data.table(x = 1:10, y = 10:1)
dt \%>\% filter(x == y)
dt \%>\% mutate(z = x + y)
# Note that dtplyr will avoid mutating the input data.table, so the
# previous translation includes an automatic copy(). You can avoid this
# with a manual call to lazy_dt()
dt \%>\%
lazy_dt(immutable = FALSE) \%>\%
mutate(z = x + y)
# If you have a data frame, you can use lazy_dt() to convert it to
# a data.table:
mtcars2 <- lazy_dt(mtcars)
mtcars2
mtcars2 \%>\% select(mpg:cyl)
mtcars2 \%>\% select(x = mpg, y = cyl)
mtcars2 \%>\% filter(cyl == 4) \%>\% select(mpg)
mtcars2 \%>\% select(mpg, cyl) \%>\% filter(cyl == 4)
mtcars2 \%>\% mutate(cyl2 = cyl * 2, cyl4 = cyl2 * 2)
mtcars2 \%>\% transmute(cyl2 = cyl * 2, vs2 = vs * 2)
mtcars2 \%>\% filter(cyl == 8) \%>\% mutate(cyl2 = cyl * 2)
# Learn more about translation in vignette("translation")
by_cyl <- mtcars2 \%>\% group_by(cyl)
by_cyl \%>\% summarise(mpg = mean(mpg))
by_cyl \%>\% mutate(mpg = mean(mpg))
by_cyl \%>\%
filter(mpg < mean(mpg)) \%>\%
summarise(hp = mean(hp))
}
dtplyr/man/intersect.dtplyr_step.Rd 0000644 0001762 0000144 00000002067 14006775461 017221 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/step-set.R
\name{intersect.dtplyr_step}
\alias{intersect.dtplyr_step}
\alias{union.dtplyr_step}
\alias{union_all.dtplyr_step}
\alias{setdiff.dtplyr_step}
\title{Set operations}
\usage{
\method{intersect}{dtplyr_step}(x, y, ...)
\method{union}{dtplyr_step}(x, y, ...)
\method{union_all}{dtplyr_step}(x, y, ...)
\method{setdiff}{dtplyr_step}(x, y, ...)
}
\arguments{
\item{x, y}{A pair of \code{\link[=lazy_dt]{lazy_dt()}}s.}
\item{...}{Ignored}
}
\description{
These are methods for the dplyr generics \code{\link[=intersect]{intersect()}}, \code{\link[=union]{union()}},
\code{\link[=union_all]{union_all()}}, and \code{\link[=setdiff]{setdiff()}}. They are translated to
\code{\link[data.table:setops]{data.table::fintersect()}}, \code{\link[data.table:setops]{data.table::funion()}}, and
\code{\link[data.table:setops]{data.table::fsetdiff()}}.
}
\examples{
dt1 <- lazy_dt(data.frame(x = 1:4))
dt2 <- lazy_dt(data.frame(x = c(2, 4, 6)))
intersect(dt1, dt2)
union(dt1, dt2)
setdiff(dt1, dt2)
}
dtplyr/man/group_modify.dtplyr_step.Rd 0000644 0001762 0000144 00000002552 14006775461 017723 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/step-modify.R
\name{group_modify.dtplyr_step}
\alias{group_modify.dtplyr_step}
\alias{group_map.dtplyr_step}
\title{Apply a function to each group}
\usage{
\method{group_modify}{dtplyr_step}(.tbl, .f, ..., keep = FALSE)
\method{group_map}{dtplyr_step}(.tbl, .f, ..., keep = FALSE)
}
\arguments{
\item{.tbl}{A \code{\link[=lazy_dt]{lazy_dt()}}}
\item{.f}{The name of a two argument function. The first argument is passed
\code{.SD},the data.table representing the current group; the second argument
is passed \code{.BY}, a list giving the current values of the grouping
variables. The function should return a list or data.table.}
\item{...}{Additional arguments passed to \code{.f}}
\item{keep}{Not supported for \link{lazy_dt}.}
}
\value{
\code{group_map()} applies \code{.f} to each group, returning a list.
\code{group_modify()} replaces each group with the results of \code{.f}, returning a
modified \code{\link[=lazy_dt]{lazy_dt()}}.
}
\description{
These are methods for the dplyr \code{\link[=group_map]{group_map()}} and \code{\link[=group_modify]{group_modify()}} generics.
They are both translated to \verb{[.data.table}.
}
\examples{
library(dplyr)
dt <- lazy_dt(mtcars)
dt \%>\%
group_by(cyl) \%>\%
group_modify(head, n = 2L)
dt \%>\%
group_by(cyl) \%>\%
group_map(head, n = 2L)
}
dtplyr/man/relocate.dtplyr_step.Rd 0000644 0001762 0000144 00000002251 14031070705 016775 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/step-colorder-relocate.R
\name{relocate.dtplyr_step}
\alias{relocate.dtplyr_step}
\title{Relocate variables using their names}
\usage{
\method{relocate}{dtplyr_step}(.data, ..., .before = NULL, .after = NULL)
}
\arguments{
\item{.data}{A \code{\link[=lazy_dt]{lazy_dt()}}.}
\item{...}{<\code{\link[dplyr:dplyr_tidy_select]{tidy-select}}> Columns to move.}
\item{.before}{<\code{\link[dplyr:dplyr_tidy_select]{tidy-select}}> Destination of
columns selected by \code{...}. Supplying neither will move columns to the
left-hand side; specifying both is an error.}
\item{.after}{<\code{\link[dplyr:dplyr_tidy_select]{tidy-select}}> Destination of
columns selected by \code{...}. Supplying neither will move columns to the
left-hand side; specifying both is an error.}
}
\description{
This is a method for the dplyr \code{\link[=relocate]{relocate()}} generic. It is translated to
the \code{j} argument of \verb{[.data.table}.
}
\examples{
library(dplyr, warn.conflicts = FALSE)
dt <- lazy_dt(data.frame(x = 1, y = 2, z = 3))
dt \%>\% relocate(z)
dt \%>\% relocate(y, .before = x)
dt \%>\% relocate(y, .after = y)
}
dtplyr/man/distinct.dtplyr_step.Rd 0000644 0001762 0000144 00000002206 14006775461 017035 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/step-call.R
\name{distinct.dtplyr_step}
\alias{distinct.dtplyr_step}
\title{Subset distinct/unique rows}
\usage{
\method{distinct}{dtplyr_step}(.data, ..., .keep_all = FALSE)
}
\arguments{
\item{.data}{A \code{\link[=lazy_dt]{lazy_dt()}}}
\item{...}{<\code{\link[dplyr:dplyr_data_masking]{data-masking}}> Optional variables to use
when determining uniqueness. If there are multiple rows for a given
combination of inputs, only the first row will be preserved. If omitted,
will use all variables.}
\item{.keep_all}{If \code{TRUE}, keep all variables in \code{.data}.
If a combination of \code{...} is not distinct, this keeps the
first row of values.}
}
\description{
This is a method for the dplyr \code{\link[=distinct]{distinct()}} generic. It is translated to
\code{\link[data.table:duplicated]{data.table::unique.data.table()}}.
}
\examples{
library(dplyr, warn.conflicts = FALSE)
df <- lazy_dt(data.frame(
x = sample(10, 100, replace = TRUE),
y = sample(10, 100, replace = TRUE)
))
df \%>\% distinct(x)
df \%>\% distinct(x, y)
df \%>\% distinct(x, .keep_all = TRUE)
}
dtplyr/man/left_join.dtplyr_step.Rd 0000644 0001762 0000144 00000005620 14126601265 017161 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/step-join.R
\name{left_join.dtplyr_step}
\alias{left_join.dtplyr_step}
\title{Join data tables}
\usage{
\method{left_join}{dtplyr_step}(x, y, ..., by = NULL, copy = FALSE, suffix = c(".x", ".y"))
}
\arguments{
\item{x, y}{A pair of \code{\link[=lazy_dt]{lazy_dt()}}s.}
\item{...}{Other parameters passed onto methods.}
\item{by}{A character vector of variables to join by.
If \code{NULL}, the default, \verb{*_join()} will perform a natural join, using all
variables in common across \code{x} and \code{y}. A message lists the variables so that you
can check they're correct; suppress the message by supplying \code{by} explicitly.
To join by different variables on \code{x} and \code{y}, use a named vector.
For example, \code{by = c("a" = "b")} will match \code{x$a} to \code{y$b}.
To join by multiple variables, use a vector with length > 1.
For example, \code{by = c("a", "b")} will match \code{x$a} to \code{y$a} and \code{x$b} to
\code{y$b}. Use a named vector to match different variables in \code{x} and \code{y}.
For example, \code{by = c("a" = "b", "c" = "d")} will match \code{x$a} to \code{y$b} and
\code{x$c} to \code{y$d}.
To perform a cross-join, generating all combinations of \code{x} and \code{y},
use \code{by = character()}.}
\item{copy}{If \code{x} and \code{y} are not from the same data source,
and \code{copy} is \code{TRUE}, then \code{y} will be copied into the
same src as \code{x}. This allows you to join tables across srcs, but
it is a potentially expensive operation so you must opt into it.}
\item{suffix}{If there are non-joined duplicate variables in \code{x} and
\code{y}, these suffixes will be added to the output to disambiguate them.
Should be a character vector of length 2.}
}
\description{
These are methods for the dplyr generics \code{\link[=left_join]{left_join()}}, \code{\link[=right_join]{right_join()}},
\code{\link[=inner_join]{inner_join()}}, \code{\link[=full_join]{full_join()}}, \code{\link[=anti_join]{anti_join()}}, and \code{\link[=semi_join]{semi_join()}}. Left, right,
inner, and anti join are translated to the \verb{[.data.table} equivalent,
full joins to \code{\link[data.table:merge]{data.table::merge.data.table()}}.
Left, right, and full joins are in some cases followed by calls to
\code{\link[data.table:setcolorder]{data.table::setcolorder()}} and \code{\link[data.table:setattr]{data.table::setnames()}} to ensure that column
order and names match dplyr conventions.
Semi-joins don't have a direct data.table equivalent.
}
\examples{
library(dplyr, warn.conflicts = FALSE)
band_dt <- lazy_dt(dplyr::band_members)
instrument_dt <- lazy_dt(dplyr::band_instruments)
band_dt \%>\% left_join(instrument_dt)
band_dt \%>\% right_join(instrument_dt)
band_dt \%>\% inner_join(instrument_dt)
band_dt \%>\% full_join(instrument_dt)
band_dt \%>\% semi_join(instrument_dt)
band_dt \%>\% anti_join(instrument_dt)
}
dtplyr/man/figures/ 0000755 0001762 0000144 00000000000 14004642135 014006 5 ustar ligges users dtplyr/man/figures/logo.png 0000644 0001762 0000144 00000136617 14004642135 015472 0 ustar ligges users PNG
IHDR ޫh gAMA a cHRM z&