dtplyr/0000755000176200001440000000000014172110611011562 5ustar liggesusersdtplyr/NAMESPACE0000644000176200001440000001121614172100676013014 0ustar liggesusers# Generated by roxygen2: do not edit by hand S3method(anti_join,data.table) S3method(anti_join,dtplyr_step) S3method(arrange,data.table) S3method(arrange,dtplyr_step) S3method(as.data.frame,dtplyr_step) S3method(as.data.frame,foo) S3method(as.data.table,dtplyr_step) S3method(as_tibble,dtplyr_step) S3method(auto_copy,dtplyr_step) S3method(collect,dtplyr_step) S3method(compute,dtplyr_step) S3method(count,data.table) S3method(count,dtplyr_step) S3method(dim,dtplyr_step) S3method(dim,dtplyr_step_first) S3method(distinct,data.table) S3method(distinct,dtplyr_step) S3method(do,data.table) S3method(do,dtplyr_step) S3method(dt_call,dtplyr_step) S3method(dt_call,dtplyr_step_assign) S3method(dt_call,dtplyr_step_first) S3method(dt_call,dtplyr_step_join) S3method(dt_call,dtplyr_step_modify) S3method(dt_call,dtplyr_step_set) S3method(dt_call,dtplyr_step_subset) S3method(dt_has_computation,dtplyr_step) S3method(dt_has_computation,dtplyr_step_first) S3method(dt_has_computation,dtplyr_step_group) S3method(dt_sources,dtplyr_step) S3method(dt_sources,dtplyr_step_first) S3method(dt_sources,dtplyr_step_join) S3method(dt_sources,dtplyr_step_set) S3method(dt_sources,dtplyr_step_subset) S3method(full_join,data.table) S3method(full_join,dtplyr_step) S3method(glimpse,dtplyr_step) S3method(group_by,data.table) S3method(group_by,dtplyr_step) S3method(group_map,dtplyr_step) S3method(group_modify,dtplyr_step) S3method(group_size,dtplyr_step) S3method(group_vars,dtplyr_step) S3method(groups,dtplyr_step) S3method(head,dtplyr_step) S3method(inner_join,data.table) S3method(inner_join,dtplyr_step) S3method(left_join,data.table) S3method(left_join,dtplyr_step) S3method(mutate,data.table) S3method(mutate,dtplyr_step) S3method(n_groups,dtplyr_step) S3method(print,dtplyr_step) S3method(pull,dtplyr_step) S3method(relocate,data.table) S3method(relocate,dtplyr_step) S3method(rename,data.table) S3method(rename,dtplyr_step) S3method(rename_with,data.table) S3method(rename_with,dtplyr_step) S3method(right_join,data.table) S3method(right_join,dtplyr_step) S3method(same_src,dtplyr_step) S3method(sample_frac,data.table) S3method(sample_frac,dtplyr_step) S3method(sample_n,data.table) S3method(sample_n,dtplyr_step) S3method(select,data.table) S3method(select,dtplyr_step) S3method(semi_join,data.table) S3method(semi_join,dtplyr_step) S3method(show_query,dtplyr_step) S3method(slice,data.table) S3method(slice,dtplyr_step) S3method(slice_head,data.table) S3method(slice_head,dtplyr_step) S3method(slice_max,data.table) S3method(slice_max,dtplyr_step) S3method(slice_min,data.table) S3method(slice_min,dtplyr_step) S3method(slice_sample,data.table) S3method(slice_sample,dtplyr_step) S3method(slice_tail,data.table) S3method(slice_tail,dtplyr_step) S3method(summarise,data.table) S3method(summarise,dtplyr_step) S3method(tail,dtplyr_step) S3method(tally,data.table) S3method(tally,dtplyr_step) S3method(tbl_vars,dtplyr_step) S3method(tbl_vars,foo) S3method(transmute,data.table) S3method(transmute,dtplyr_step) S3method(ungroup,data.table) S3method(ungroup,dtplyr_step) S3method(union_all,data.table) S3method(union_all,dtplyr_step) S3method(unique,dtplyr_step) export(.datatable.aware) export(lazy_dt) import(rlang) importFrom(data.table,as.data.table) importFrom(data.table,data.table) importFrom(data.table,is.data.table) importFrom(dplyr,anti_join) importFrom(dplyr,arrange) importFrom(dplyr,auto_copy) importFrom(dplyr,collect) importFrom(dplyr,compute) importFrom(dplyr,count) importFrom(dplyr,distinct) importFrom(dplyr,do) importFrom(dplyr,filter) importFrom(dplyr,full_join) importFrom(dplyr,glimpse) importFrom(dplyr,group_by) importFrom(dplyr,group_map) importFrom(dplyr,group_modify) importFrom(dplyr,group_size) importFrom(dplyr,group_vars) importFrom(dplyr,groups) importFrom(dplyr,inner_join) importFrom(dplyr,intersect) importFrom(dplyr,left_join) importFrom(dplyr,mutate) importFrom(dplyr,n_groups) importFrom(dplyr,pull) importFrom(dplyr,relocate) importFrom(dplyr,rename) importFrom(dplyr,rename_with) importFrom(dplyr,right_join) importFrom(dplyr,same_src) importFrom(dplyr,sample_frac) importFrom(dplyr,sample_n) importFrom(dplyr,select) importFrom(dplyr,semi_join) importFrom(dplyr,setdiff) importFrom(dplyr,show_query) importFrom(dplyr,slice) importFrom(dplyr,slice_head) importFrom(dplyr,slice_max) importFrom(dplyr,slice_min) importFrom(dplyr,slice_sample) importFrom(dplyr,slice_tail) importFrom(dplyr,summarise) importFrom(dplyr,tally) importFrom(dplyr,tbl_vars) importFrom(dplyr,transmute) importFrom(dplyr,ungroup) importFrom(dplyr,union) importFrom(dplyr,union_all) importFrom(glue,glue) importFrom(lifecycle,deprecated) importFrom(tibble,as_tibble) importFrom(tidyselect,everything) importFrom(utils,head) importFrom(utils,tail) dtplyr/LICENSE0000644000176200001440000000005414004642135012573 0ustar liggesusersYEAR: 2020 COPYRIGHT HOLDER: dtplyr authors dtplyr/README.md0000644000176200001440000001110014152441602013037 0ustar liggesusers # dtplyr [![CRAN status](https://www.r-pkg.org/badges/version/dtplyr)](https://cran.r-project.org/package=dtplyr) [![R-CMD-check](https://github.com/tidyverse/dtplyr/workflows/R-CMD-check/badge.svg)](https://github.com/tidyverse/dtplyr/actions) [![Codecov test coverage](https://codecov.io/gh/tidyverse/dtplyr/branch/main/graph/badge.svg)](https://app.codecov.io/gh/tidyverse/dtplyr?branch=main) ## Overview dtplyr provides a [data.table](http://r-datatable.com/) backend for dplyr. The goal of dtplyr is to allow you to write dplyr code that is automatically translated to the equivalent, but usually much faster, data.table code. Compared to the previous release, this version of dtplyr is a complete rewrite that focusses only on lazy evaluation triggered by use of `lazy_dt()`. This means that no computation is performed until you explicitly request it with `as.data.table()`, `as.data.frame()` or `as_tibble()`. This has a considerable advantage over the previous version (which eagerly evaluated each step) because it allows dtplyr to generate significantly more performant translations. This is a large change that breaks all existing uses of dtplyr. But frankly, dtplyr was pretty useless before because it did such a bad job of generating data.table code. Fortunately few people used it, so a major overhaul was possible. See `vignette("translation")` for details of the current translations, and [table.express](https://github.com/asardaes/table.express) and [rqdatatable](https://github.com/WinVector/rqdatatable/) for related work. ## Installation You can install from CRAN with: ``` r install.packages("dtplyr") ``` Or try the development version from GitHub with: ``` r # install.packages("devtools") devtools::install_github("tidyverse/dtplyr") ``` ## Usage To use dtplyr, you must at least load dtplyr and dplyr. You may also want to load [data.table](http://r-datatable.com/) so you can access the other goodies that it provides: ``` r library(data.table) library(dtplyr) library(dplyr, warn.conflicts = FALSE) ``` Then use `lazy_dt()` to create a “lazy” data table that tracks the operations performed on it. ``` r mtcars2 <- lazy_dt(mtcars) ``` You can preview the transformation (including the generated data.table code) by printing the result: ``` r mtcars2 %>% filter(wt < 5) %>% mutate(l100k = 235.21 / mpg) %>% # liters / 100 km group_by(cyl) %>% summarise(l100k = mean(l100k)) #> Source: local data table [3 x 2] #> Call: `_DT1`[wt < 5][, `:=`(l100k = 235.21/mpg)][, .(l100k = mean(l100k)), #> keyby = .(cyl)] #> #> cyl l100k #> #> 1 4 9.05 #> 2 6 12.0 #> 3 8 14.9 #> #> # Use as.data.table()/as.data.frame()/as_tibble() to access results ``` But generally you should reserve this only for debugging, and use `as.data.table()`, `as.data.frame()`, or `as_tibble()` to indicate that you’re done with the transformation and want to access the results: ``` r mtcars2 %>% filter(wt < 5) %>% mutate(l100k = 235.21 / mpg) %>% # liters / 100 km group_by(cyl) %>% summarise(l100k = mean(l100k)) %>% as_tibble() #> # A tibble: 3 × 2 #> cyl l100k #> #> 1 4 9.05 #> 2 6 12.0 #> 3 8 14.9 ``` ## Why is dtplyr slower than data.table? There are three primary reasons that dtplyr will always be somewhat slower than data.table: - Each dplyr verb must do some work to convert dplyr syntax to data.table syntax. This takes time proportional to the complexity of the input code, not the input *data*, so should be a negligible overhead for large datasets. [Initial benchmarks](https://dtplyr.tidyverse.org/articles/translation.html#performance) suggest that the overhead should be under 1ms per dplyr call. - Some data.table expressions have no direct dplyr equivalent. For example, there’s no way to express cross- or rolling-joins with dplyr. - To match dplyr semantics, `mutate()` does not modify in place by default. This means that most expressions involving `mutate()` must make a copy that would not be necessary if you were using data.table directly. (You can opt out of this behaviour in `lazy_dt()` with `immutable = FALSE`). ## Code of Conduct Please note that the dtplyr project is released with a [Contributor Code of Conduct](https://dtplyr.tidyverse.org/CODE_OF_CONDUCT.html). By contributing to this project, you agree to abide by its terms. dtplyr/man/0000755000176200001440000000000014172101033012333 5ustar liggesusersdtplyr/man/mutate.dtplyr_step.Rd0000644000176200001440000000263614126601265016513 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/step-mutate.R \name{mutate.dtplyr_step} \alias{mutate.dtplyr_step} \title{Create and modify columns} \usage{ \method{mutate}{dtplyr_step}(.data, ..., .before = NULL, .after = NULL) } \arguments{ \item{.data}{A \code{\link[=lazy_dt]{lazy_dt()}}.} \item{...}{<\link[dplyr:dplyr_data_masking]{data-masking}> Name-value pairs. The name gives the name of the column in the output, and the value should evaluate to a vector.} \item{.before, .after}{\Sexpr[results=rd]{lifecycle::badge("experimental")} <\code{\link[=dplyr_tidy_select]{tidy-select}}> Optionally, control where new columns should appear (the default is to add to the right hand side). See \code{\link[=relocate]{relocate()}} for more details.} } \description{ This is a method for the dplyr \code{\link[=mutate]{mutate()}} generic. It is translated to the \code{j} argument of \verb{[.data.table}, using \verb{:=} to modify "in place". If \code{.before} or \code{.after} is provided, the new columns are relocated with a call to \code{\link[data.table:setcolorder]{data.table::setcolorder()}}. } \examples{ library(dplyr, warn.conflicts = FALSE) dt <- lazy_dt(data.frame(x = 1:5, y = 5:1)) dt \%>\% mutate(a = (x + y) / 2, b = sqrt(x^2 + y^2)) # It uses a more sophisticated translation when newly created variables # are used in the same expression dt \%>\% mutate(x1 = x + 1, x2 = x1 + 1) } dtplyr/man/expand.dtplyr_step.Rd0000644000176200001440000000615114150760302016462 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/step-subset-expand.R \name{expand.dtplyr_step} \alias{expand.dtplyr_step} \title{Expand data frame to include all possible combinations of values.} \usage{ \method{expand}{dtplyr_step}(data, ..., .name_repair = "check_unique") } \arguments{ \item{data}{A \code{\link[=lazy_dt]{lazy_dt()}}.} \item{...}{Specification of columns to expand. Columns can be atomic vectors or lists. \itemize{ \item To find all unique combinations of \code{x}, \code{y} and \code{z}, including those not present in the data, supply each variable as a separate argument: \code{expand(df, x, y, z)}. \item To find only the combinations that occur in the data, use \code{nesting}: \code{expand(df, nesting(x, y, z))}. \item You can combine the two forms. For example, \code{expand(df, nesting(school_id, student_id), date)} would produce a row for each present school-student combination for all possible dates. } Unlike the data.frame method, this method does not use the full set of levels, just those that appear in the data. When used with continuous variables, you may need to fill in values that do not appear in the data: to do so use expressions like \code{year = 2010:2020} or \code{year = full_seq(year,1)}.} \item{.name_repair}{Treatment of problematic column names: \itemize{ \item \code{"minimal"}: No name repair or checks, beyond basic existence, \item \code{"unique"}: Make sure names are unique and not empty, \item \code{"check_unique"}: (default value), no name repair, but check they are \code{unique}, \item \code{"universal"}: Make the names \code{unique} and syntactic \item a function: apply custom name repair (e.g., \code{.name_repair = make.names} for names in the style of base R). \item A purrr-style anonymous function, see \code{\link[rlang:as_function]{rlang::as_function()}} } This argument is passed on as \code{repair} to \code{\link[vctrs:vec_as_names]{vctrs::vec_as_names()}}. See there for more details on these terms and the strategies used to enforce them.} } \description{ This is a method for the tidyr \code{expand()} generic. It is translated to \code{\link[data.table:J]{data.table::CJ()}}. } \examples{ library(tidyr) fruits <- lazy_dt(tibble( type = c("apple", "orange", "apple", "orange", "orange", "orange"), year = c(2010, 2010, 2012, 2010, 2010, 2012), size = factor( c("XS", "S", "M", "S", "S", "M"), levels = c("XS", "S", "M", "L") ), weights = rnorm(6, as.numeric(size) + 2) )) # All possible combinations --------------------------------------- # Note that only present levels of the factor variable `size` are retained. fruits \%>\% expand(type) fruits \%>\% expand(type, size) # This is different from the data frame behaviour: fruits \%>\% dplyr::collect() \%>\% expand(type, size) # Other uses ------------------------------------------------------- fruits \%>\% expand(type, size, 2010:2012) # Use `anti_join()` to determine which observations are missing all <- fruits \%>\% expand(type, size, year) all all \%>\% dplyr::anti_join(fruits) # Use with `right_join()` to fill in missing rows fruits \%>\% dplyr::right_join(all) } dtplyr/man/rename.dtplyr_step.Rd0000644000176200001440000000235214006775461016465 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/step-call.R \name{rename.dtplyr_step} \alias{rename.dtplyr_step} \alias{rename_with.dtplyr_step} \title{Rename columns using their names} \usage{ \method{rename}{dtplyr_step}(.data, ...) \method{rename_with}{dtplyr_step}(.data, .fn, .cols = everything(), ...) } \arguments{ \item{.data}{A \code{\link[=lazy_dt]{lazy_dt()}}} \item{...}{For \code{rename()}: <\code{\link[dplyr:dplyr_tidy_select]{tidy-select}}> Use \code{new_name = old_name} to rename selected variables. For \code{rename_with()}: additional arguments passed onto \code{.fn}.} \item{.fn}{A function used to transform the selected \code{.cols}. Should return a character vector the same length as the input.} \item{.cols}{<\code{\link[dplyr:dplyr_tidy_select]{tidy-select}}> Columns to rename; defaults to all columns.} } \description{ These are methods for the dplyr generics \code{\link[=rename]{rename()}} and \code{\link[=rename_with]{rename_with()}}. They are both translated to \code{\link[data.table:setattr]{data.table::setnames()}}. } \examples{ library(dplyr, warn.conflicts = FALSE) dt <- lazy_dt(data.frame(x = 1, y = 2, z = 3)) dt \%>\% rename(new_x = x, new_y = y) dt \%>\% rename_with(toupper) } dtplyr/man/filter.dtplyr_step.Rd0000644000176200001440000000176514006777613016513 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/step-subset-filter.R \name{filter.dtplyr_step} \alias{filter.dtplyr_step} \title{Subset rows using column values} \usage{ \method{filter}{dtplyr_step}(.data, ..., .preserve = FALSE) } \arguments{ \item{.data}{A \code{\link[=lazy_dt]{lazy_dt()}}.} \item{...}{<\code{\link[dplyr:dplyr_data_masking]{data-masking}}> Expressions that return a logical value, and are defined in terms of the variables in \code{.data}. If multiple expressions are included, they are combined with the \code{&} operator. Only rows for which all conditions evaluate to \code{TRUE} are kept.} \item{.preserve}{Ignored} } \description{ This is a method for the dplyr \code{\link[=arrange]{arrange()}} generic. It is translated to the \code{i} argument of \verb{[.data.table} } \examples{ library(dplyr, warn.conflicts = FALSE) dt <- lazy_dt(mtcars) dt \%>\% filter(cyl == 4) dt \%>\% filter(vs, am) dt \%>\% group_by(cyl) \%>\% filter(mpg > mean(mpg)) } dtplyr/man/drop_na.dtplyr_step.Rd0000644000176200001440000000132114021424751016621 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/step-call.R \name{drop_na.dtplyr_step} \alias{drop_na.dtplyr_step} \title{Drop rows containing missing values} \usage{ \method{drop_na}{dtplyr_step}(data, ...) } \arguments{ \item{data}{A \code{\link[=lazy_dt]{lazy_dt()}}.} \item{...}{<\code{\link[tidyr:tidyr_tidy_select]{tidy-select}}> Columns to inspect for missing values.} } \description{ This is a method for the tidyr \code{drop_na()} generic. It is translated to \code{data.table::na.omit()} } \examples{ library(dplyr) library(tidyr) dt <- lazy_dt(tibble(x = c(1, 2, NA), y = c("a", NA, "b"))) dt \%>\% drop_na() dt \%>\% drop_na(x) vars <- "y" dt \%>\% drop_na(x, any_of(vars)) } dtplyr/man/summarise.dtplyr_step.Rd0000644000176200001440000000326614126601265017221 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/step-subset-summarise.R \name{summarise.dtplyr_step} \alias{summarise.dtplyr_step} \title{Summarise each group to one row} \usage{ \method{summarise}{dtplyr_step}(.data, ..., .groups = NULL) } \arguments{ \item{.data}{A \code{\link[=lazy_dt]{lazy_dt()}}.} \item{...}{<\code{\link[dplyr:dplyr_data_masking]{data-masking}}> Name-value pairs of summary functions. The name will be the name of the variable in the result. The value can be: \itemize{ \item A vector of length 1, e.g. \code{min(x)}, \code{n()}, or \code{sum(is.na(y))}. \item A vector of length \code{n}, e.g. \code{quantile()}. \item A data frame, to add multiple columns from a single expression. }} \item{.groups}{\Sexpr[results=rd]{lifecycle::badge("experimental")} Grouping structure of the result. \itemize{ \item "drop_last": dropping the last level of grouping. This was the only supported option before version 1.0.0. \item "drop": All levels of grouping are dropped. \item "keep": Same grouping structure as \code{.data}. } When \code{.groups} is not specified, it defaults to "drop_last". In addition, a message informs you of that choice, unless the result is ungrouped, the option "dplyr.summarise.inform" is set to \code{FALSE}, or when \code{summarise()} is called from a function in a package.} } \description{ This is a method for the dplyr \code{\link[=summarise]{summarise()}} generic. It is translated to the \code{j} argument of \verb{[.data.table}. } \examples{ library(dplyr, warn.conflicts = FALSE) dt <- lazy_dt(mtcars) dt \%>\% group_by(cyl) \%>\% summarise(vs = mean(vs)) dt \%>\% group_by(cyl) \%>\% summarise(across(disp:wt, mean)) } dtplyr/man/lazy_dt.Rd0000644000176200001440000000547314007000430014275 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/step-first.R \name{lazy_dt} \alias{lazy_dt} \alias{tbl_dt} \alias{grouped_dt} \title{Create a "lazy" data.table for use with dplyr verbs} \usage{ lazy_dt(x, name = NULL, immutable = TRUE, key_by = NULL) } \arguments{ \item{x}{A data table (or something can can be coerced to a data table).} \item{name}{Optionally, supply a name to be used in generated expressions. For expert use only.} \item{immutable}{If \code{TRUE}, \code{x} is treated as immutable and will never be modified by any code generated by dtplyr. Alternatively, you can set \code{immutable = FALSE} to allow dtplyr to modify the input object.} \item{key_by}{Set keys for data frame, using \code{\link[=select]{select()}} semantics (e.g. \code{key_by = c(key1, key2)}. This uses \code{\link[data.table:setkey]{data.table::setkey()}} to sort the table and build an index. This will considerably improve performance for subsets, summaries, and joins that use the keys. See \code{vignette("datatable-keys-fast-subset")} for more details.} } \description{ A lazy data.table lazy captures the intent of dplyr verbs, only actually performing computation when requested (with \code{\link[=collect]{collect()}}, \code{\link[=pull]{pull()}}, \code{\link[=as.data.frame]{as.data.frame()}}, \code{\link[data.table:as.data.table]{data.table::as.data.table()}}, or \code{\link[tibble:as_tibble]{tibble::as_tibble()}}). This allows dtplyr to convert dplyr verbs into as few data.table expressions as possible, which leads to a high performance translation. See \code{vignette("translation")} for the details of the translation. } \examples{ library(dplyr, warn.conflicts = FALSE) # If you have a data.table, using it with any dplyr generic will # automatically convert it to a lazy_dt object dt <- data.table::data.table(x = 1:10, y = 10:1) dt \%>\% filter(x == y) dt \%>\% mutate(z = x + y) # Note that dtplyr will avoid mutating the input data.table, so the # previous translation includes an automatic copy(). You can avoid this # with a manual call to lazy_dt() dt \%>\% lazy_dt(immutable = FALSE) \%>\% mutate(z = x + y) # If you have a data frame, you can use lazy_dt() to convert it to # a data.table: mtcars2 <- lazy_dt(mtcars) mtcars2 mtcars2 \%>\% select(mpg:cyl) mtcars2 \%>\% select(x = mpg, y = cyl) mtcars2 \%>\% filter(cyl == 4) \%>\% select(mpg) mtcars2 \%>\% select(mpg, cyl) \%>\% filter(cyl == 4) mtcars2 \%>\% mutate(cyl2 = cyl * 2, cyl4 = cyl2 * 2) mtcars2 \%>\% transmute(cyl2 = cyl * 2, vs2 = vs * 2) mtcars2 \%>\% filter(cyl == 8) \%>\% mutate(cyl2 = cyl * 2) # Learn more about translation in vignette("translation") by_cyl <- mtcars2 \%>\% group_by(cyl) by_cyl \%>\% summarise(mpg = mean(mpg)) by_cyl \%>\% mutate(mpg = mean(mpg)) by_cyl \%>\% filter(mpg < mean(mpg)) \%>\% summarise(hp = mean(hp)) } dtplyr/man/intersect.dtplyr_step.Rd0000644000176200001440000000206714006775461017221 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/step-set.R \name{intersect.dtplyr_step} \alias{intersect.dtplyr_step} \alias{union.dtplyr_step} \alias{union_all.dtplyr_step} \alias{setdiff.dtplyr_step} \title{Set operations} \usage{ \method{intersect}{dtplyr_step}(x, y, ...) \method{union}{dtplyr_step}(x, y, ...) \method{union_all}{dtplyr_step}(x, y, ...) \method{setdiff}{dtplyr_step}(x, y, ...) } \arguments{ \item{x, y}{A pair of \code{\link[=lazy_dt]{lazy_dt()}}s.} \item{...}{Ignored} } \description{ These are methods for the dplyr generics \code{\link[=intersect]{intersect()}}, \code{\link[=union]{union()}}, \code{\link[=union_all]{union_all()}}, and \code{\link[=setdiff]{setdiff()}}. They are translated to \code{\link[data.table:setops]{data.table::fintersect()}}, \code{\link[data.table:setops]{data.table::funion()}}, and \code{\link[data.table:setops]{data.table::fsetdiff()}}. } \examples{ dt1 <- lazy_dt(data.frame(x = 1:4)) dt2 <- lazy_dt(data.frame(x = c(2, 4, 6))) intersect(dt1, dt2) union(dt1, dt2) setdiff(dt1, dt2) } dtplyr/man/group_modify.dtplyr_step.Rd0000644000176200001440000000255214006775461017723 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/step-modify.R \name{group_modify.dtplyr_step} \alias{group_modify.dtplyr_step} \alias{group_map.dtplyr_step} \title{Apply a function to each group} \usage{ \method{group_modify}{dtplyr_step}(.tbl, .f, ..., keep = FALSE) \method{group_map}{dtplyr_step}(.tbl, .f, ..., keep = FALSE) } \arguments{ \item{.tbl}{A \code{\link[=lazy_dt]{lazy_dt()}}} \item{.f}{The name of a two argument function. The first argument is passed \code{.SD},the data.table representing the current group; the second argument is passed \code{.BY}, a list giving the current values of the grouping variables. The function should return a list or data.table.} \item{...}{Additional arguments passed to \code{.f}} \item{keep}{Not supported for \link{lazy_dt}.} } \value{ \code{group_map()} applies \code{.f} to each group, returning a list. \code{group_modify()} replaces each group with the results of \code{.f}, returning a modified \code{\link[=lazy_dt]{lazy_dt()}}. } \description{ These are methods for the dplyr \code{\link[=group_map]{group_map()}} and \code{\link[=group_modify]{group_modify()}} generics. They are both translated to \verb{[.data.table}. } \examples{ library(dplyr) dt <- lazy_dt(mtcars) dt \%>\% group_by(cyl) \%>\% group_modify(head, n = 2L) dt \%>\% group_by(cyl) \%>\% group_map(head, n = 2L) } dtplyr/man/relocate.dtplyr_step.Rd0000644000176200001440000000225114031070705016775 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/step-colorder-relocate.R \name{relocate.dtplyr_step} \alias{relocate.dtplyr_step} \title{Relocate variables using their names} \usage{ \method{relocate}{dtplyr_step}(.data, ..., .before = NULL, .after = NULL) } \arguments{ \item{.data}{A \code{\link[=lazy_dt]{lazy_dt()}}.} \item{...}{<\code{\link[dplyr:dplyr_tidy_select]{tidy-select}}> Columns to move.} \item{.before}{<\code{\link[dplyr:dplyr_tidy_select]{tidy-select}}> Destination of columns selected by \code{...}. Supplying neither will move columns to the left-hand side; specifying both is an error.} \item{.after}{<\code{\link[dplyr:dplyr_tidy_select]{tidy-select}}> Destination of columns selected by \code{...}. Supplying neither will move columns to the left-hand side; specifying both is an error.} } \description{ This is a method for the dplyr \code{\link[=relocate]{relocate()}} generic. It is translated to the \code{j} argument of \verb{[.data.table}. } \examples{ library(dplyr, warn.conflicts = FALSE) dt <- lazy_dt(data.frame(x = 1, y = 2, z = 3)) dt \%>\% relocate(z) dt \%>\% relocate(y, .before = x) dt \%>\% relocate(y, .after = y) } dtplyr/man/distinct.dtplyr_step.Rd0000644000176200001440000000220614006775461017035 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/step-call.R \name{distinct.dtplyr_step} \alias{distinct.dtplyr_step} \title{Subset distinct/unique rows} \usage{ \method{distinct}{dtplyr_step}(.data, ..., .keep_all = FALSE) } \arguments{ \item{.data}{A \code{\link[=lazy_dt]{lazy_dt()}}} \item{...}{<\code{\link[dplyr:dplyr_data_masking]{data-masking}}> Optional variables to use when determining uniqueness. If there are multiple rows for a given combination of inputs, only the first row will be preserved. If omitted, will use all variables.} \item{.keep_all}{If \code{TRUE}, keep all variables in \code{.data}. If a combination of \code{...} is not distinct, this keeps the first row of values.} } \description{ This is a method for the dplyr \code{\link[=distinct]{distinct()}} generic. It is translated to \code{\link[data.table:duplicated]{data.table::unique.data.table()}}. } \examples{ library(dplyr, warn.conflicts = FALSE) df <- lazy_dt(data.frame( x = sample(10, 100, replace = TRUE), y = sample(10, 100, replace = TRUE) )) df \%>\% distinct(x) df \%>\% distinct(x, y) df \%>\% distinct(x, .keep_all = TRUE) } dtplyr/man/left_join.dtplyr_step.Rd0000644000176200001440000000562014126601265017161 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/step-join.R \name{left_join.dtplyr_step} \alias{left_join.dtplyr_step} \title{Join data tables} \usage{ \method{left_join}{dtplyr_step}(x, y, ..., by = NULL, copy = FALSE, suffix = c(".x", ".y")) } \arguments{ \item{x, y}{A pair of \code{\link[=lazy_dt]{lazy_dt()}}s.} \item{...}{Other parameters passed onto methods.} \item{by}{A character vector of variables to join by. If \code{NULL}, the default, \verb{*_join()} will perform a natural join, using all variables in common across \code{x} and \code{y}. A message lists the variables so that you can check they're correct; suppress the message by supplying \code{by} explicitly. To join by different variables on \code{x} and \code{y}, use a named vector. For example, \code{by = c("a" = "b")} will match \code{x$a} to \code{y$b}. To join by multiple variables, use a vector with length > 1. For example, \code{by = c("a", "b")} will match \code{x$a} to \code{y$a} and \code{x$b} to \code{y$b}. Use a named vector to match different variables in \code{x} and \code{y}. For example, \code{by = c("a" = "b", "c" = "d")} will match \code{x$a} to \code{y$b} and \code{x$c} to \code{y$d}. To perform a cross-join, generating all combinations of \code{x} and \code{y}, use \code{by = character()}.} \item{copy}{If \code{x} and \code{y} are not from the same data source, and \code{copy} is \code{TRUE}, then \code{y} will be copied into the same src as \code{x}. This allows you to join tables across srcs, but it is a potentially expensive operation so you must opt into it.} \item{suffix}{If there are non-joined duplicate variables in \code{x} and \code{y}, these suffixes will be added to the output to disambiguate them. Should be a character vector of length 2.} } \description{ These are methods for the dplyr generics \code{\link[=left_join]{left_join()}}, \code{\link[=right_join]{right_join()}}, \code{\link[=inner_join]{inner_join()}}, \code{\link[=full_join]{full_join()}}, \code{\link[=anti_join]{anti_join()}}, and \code{\link[=semi_join]{semi_join()}}. Left, right, inner, and anti join are translated to the \verb{[.data.table} equivalent, full joins to \code{\link[data.table:merge]{data.table::merge.data.table()}}. Left, right, and full joins are in some cases followed by calls to \code{\link[data.table:setcolorder]{data.table::setcolorder()}} and \code{\link[data.table:setattr]{data.table::setnames()}} to ensure that column order and names match dplyr conventions. Semi-joins don't have a direct data.table equivalent. } \examples{ library(dplyr, warn.conflicts = FALSE) band_dt <- lazy_dt(dplyr::band_members) instrument_dt <- lazy_dt(dplyr::band_instruments) band_dt \%>\% left_join(instrument_dt) band_dt \%>\% right_join(instrument_dt) band_dt \%>\% inner_join(instrument_dt) band_dt \%>\% full_join(instrument_dt) band_dt \%>\% semi_join(instrument_dt) band_dt \%>\% anti_join(instrument_dt) } dtplyr/man/figures/0000755000176200001440000000000014004642135014006 5ustar liggesusersdtplyr/man/figures/logo.png0000644000176200001440000013661714004642135015472 0ustar liggesusersPNG  IHDRޫhgAMA a cHRMz&u0`:pQ<bKGD pHYs.#.#x?vtIME-BIDATxw$yߋ~ߪι{ټ؄HE A vI-YH]_k>9dɦ#D@ $"abyN3swٝMY_6CRZ.R)7RTBO_S:A\sQ.QUuw_+/V򠪪(+}Hl0==i($Rʚ !N !TmME??җ Q:tMPUծiR?RZ$ B?+B\+V_ 5#" WJQ$~x<.7kF`.% qw"BBӴQEQ~p/y @'aWBǤ>.3!V}C@~ǎ>}J_V | (VMK)x<_V5M+nW76|ںGJǀrr3 ###Moo/.kk9F knn6/|RdSJkoV#B$ɚy^f!lo !ZJ9!f}Jߪ TA4I)$risss 177=`B粒`\\ Bi)eJ_% FUk4)7H)x<0JbA__~V9(bXu[5MewbI*GGGV,犢MfEX,^[ _FGEQ= |]JyyEӴu=7EQhii]!?ր7E#)ev# eZVX,~RzDJV $Ν#P,/ZV06m.9X9 C!ZoEmȗxGBa@[#r.cll2e=wEoo/8Fio]Ӵ Y/^|>APoW#r4CCCD+z-`~PUuɹ.s 'l B$/#6Ψkl~ՐRF2}WE~`? !^R7 qbWl~_`_4Νc||Bp/.l6]]]lڴ ۽$}>Dbö EbQ#M7F|>O$ܹsR+}9 i&0vx+!_ "_<6|0LxTJOUad繳 1??Ůfݍngpp|>655OKKˊOOhrq `*  155E\"`Zٹs'w}7[lb /3Ϭ)VUvb[,p޶ʆm±A5+6/s6t EQ{a޽8N4MC ?'|\~Dž5}65b `ͯbw99p7xUEO'O^{\mWlB b)Rn^OߥFss3wqz+MMM& qޣ~mf3?Ew+hȏWAUVˉDa&&&7|>r wy'f܈ 癞z7|s]#ݻcޟ]xN_TU}\.oLX^ec*_Zl~d+r Ns6mڄ(J( T#G/L&̬W04XP(\rϖs-X,aulC-qm~M6q!mfZA'n. {j4 ( 5{>Bm&RYaֱA`jڶ:UKbs8 K&9z{{B!Dh4jn~TU5`0HXdtt!D ޾,yW&.H)?(ʟ !4mi~ӟ˵^ !pUk>iV_MMMQ(PU$Š>4Mf|ilقM(h !Bl޼Vfggdnn{R]v:5q*bjjP(m:gXJhŶ۝?t/j'!lnn5MQ.onnL&cimmEU ONNR(X,'''Mbl69uCC__pVhJgg'}}}KZy!JMN@AHQS"Eq)B?njjacK`c-=33oRe[.\Ve]m~Xxǒ4G>UU 乚[ "d2Ӄi|>"t9٬jkkj6ZZӁ\.3>>r}_CmYph@fH^ZvD=BK)AEQxBӴ?{+܊@l~Ƈ_=`=E1==m餵>fr-F9rȲr͙yp  q^/RJx< / %EQXXXXs_]V^03jK+ -= }.7eY`fnG?bDVˏK)b%??vŏ<7p8vGs,###|J#bs}e^ iSSS&)N'---:IF4^/@`EndIRk2qdyl |qZH!NEp ZLooﲲ*"ۄI)oR>.t:?6kպe!Dn֭}乗733C>p \n7<p)%&AQ%r$tnKKK]!0K|AΝ;װCQӝ:]͑eS`GQ>lis=EXyΞ=Ԫ*"+B)˓BQzK_J\>tR EQ܀ae5^n_4%L['---8,lx,FAK3z 3;,'I;KiʴJtب.J)4C+Ml~TL%R(aΝH)xmB(⏀rsU_6V]ݮdٛ+yAȶP*bxxL[ 8Ӽyd25 *FP HfZ۸;hmm],~ ,‰l~հZ$8 ^|M$%f'h~\]ʲ[P&;?͒,]|Q{_XX0dzZ~!R wtYVvVZ|>nf뮺]hF0$NkT UU9p@*+T*qI^z%N:׾5MUss3CCC )g3seGBs;_sVEI扤3~T 5c%dW1צ3/}"74m*? Im\.5諯>6fX%W_~?z+w}7o>le}v>vرd\.388K/Dgg'<G2;;k8'EQ4y(n455믿oM<fyf,`<Ef5DFtm~9J>ߧ+^@)23IWrreIcrKB ӫN["ORONMMؕ??Eg^2_>hT}E ``۶mfz$g߾}\J)W_äR)JR łDa+a9PӔn"lݷ+Z{Qb<'S?^T*MYfkkk#n!GJyHQ?BvDбJpENžGpR|u탦OJ߀BL]u'C\ʔ+f[1D"fXlZ=5l{\(4-r7p~-3?T199i^f4McvvnOSSI\UUI$꫼f˰uvv+,nbW諴o",9mf 2Cɯ~a,s6l_)?BR׍l~W-cCގb1hi.+MF$ sxu[[[}o1 `f6H4??Ϲs8}4o;W,9pf @UU,~! H)Ftb ض3p~4Jfs2FP_v5-b1vAOOOM4SŶE!_if\oECUm4j34L&jvrmO}P:f~~~YM7wM86f7MXDd8{YPB9a;s /2'N0Cem~+9j3[5xU?d19.\.݄B+$*?w !]d[<^2/>(R嚫l~ۯ&_.cvvFcH$!ؼy3^\(xyEٽ{w(l6=Ö-[PUuPU.5"l~FTJ0 |PE8hvDBg$ /GEQ2/ЋFm-B?R>dX :tOtbEQBT;jI/#388xYl~Rᠥ\&''ٲeKI\.s%zm=ݻ5{bw}f~j(VH&V1e۰*NJ2D]ND/"Eޚ.F~||l6(LLLy%ƏjTہbB5p YVP(T.*֕/J)6lClaܹs5ǙP(`B{n6mč7ވA4iΝ_ȑ#= _#~X#Ul~>k/ۃ4K| c$e叿HbF4Fњq9"HdXB-QJЧ-bC6䏕)}|m~k\^Ν;;jrBC !رc xM,ys򟔒+}6*nvb3tF#j~߃l~]8@Yƞ&U\j餫t Џ8mq-}k +q !&7k-H|+d2HTU1)?v7"4T5*K !s=ݻ]Sݶ5˙JTFـRB!}(_,X-W*oT-~GQZSvBs)Ks-=5̀=tO':H5BY !ĿR>B|T*E:DP[ܗC4o6j3:NOO딃]w7ߌ7s| r"QN r9fffؼys14GW^`J6D-4;cg1waSPye\Vx^zzzhmm5httPv6g/N FBXY>d&F8^mSRO !o?*+_j~n\FUծ_7o{wp8\֪J<筷ޢ\.O8o;qyr.~~/”(Jc̙3f#9c,CB]ϦЗ(J %~$#sZp8æu@<7[61Xve\rq:l1؏\#jbWWVqCJ_/%?t"[sE !>_{Zl~FG?6 En&,K>pQ~144֭[{(RHx~ѦMRf!BPMӈD"5Xka}.t]E)j"88Xi/, Դ ffF#[?s+G@ڋ݉9;!cY|>>ϴ"⿒RB1iCT*)a{Rp뭷lEN>/ɓ'ͭh4J:6~.999I.5-K*H$po@ 4ͯB淚#+hcg8]RPY\ U|.A A`%He!?4.BYk:zzzBfDcs=j>.}8.9~A"Q0z æ:`Zl5wEEQ~FK`T-kghj/.CQS 477#$ vbk8{,_IX,V3$077GWW盛U[,Fuo%GҪvDq0!v+Y֭0%&%42l%hTWq:}-*ݦKWakBwBk@w!Ytb_7)Me?@5F%%ns>2i 7 '[< m8.N;iu#9y d{p80IשgPT*aٰl4ٳ !>+T*/,Rg],^BQ;.{^|imۆG0!p.' JN$7`d|(J{l6x4Bm~{;^?8xS+ u;K,ٮb&) _L^B,%YHH:?q&%Cϣ5vz?qzL#حnQ-ڥnV{bJX- v+x]ػJE3,cÓqgb'$ ۀ4dt9caz=q;8J4' ?pFP*.>V+---tvvz3EȫEK)?gX5o5jF Z_f``Ƭ /jW4xcb[sl? H!NG`8y-vY{5;*:OX,ֲ K)O衯 9|ln891>#kd//یhorr|0 gK4TŶ#[sx74(䗦'V6:::55P},DF m^rf;j!7? b߅Pl~`G1z>ʲtCN._K 455ۻ755ز*z--\$;}tt::f8őp<&m8Ngg%s&&&]SWkRJ~mvESS=wW_ezzEe[oƔzZl <H1ľ@G C#Z _OߢB2#R4 ѤZHH>b'BWoQ:YwJKaGib.xng36RSs$ W45 +)VzkgΜ&wP(CÏ6|>nQnnn^Ϫc1?T{īEl#Aa.jajRvUݮ+ڧԘJ%@Xhkk4Mf$2X]6-(P^ř㣷6:J93>:6?fz׼5<7H0>>N4Jss3pl1y䑲RYsرuR]N,Q,,,,[PZͯ'0n|MqA$e/Q.OE2I]F\n 9 h骬Gs .Ư|&]#V\*I:_jQlN2:-9~G0kCm)!X`zr|6۩Ok@,nm %JZ >|S86*:6?UUZ\,&&&Y~5;;[nfWWp\f)A*6?fl gB?Nr6#ƾO494 '\ dUϓܺGԨDLnتp~ F98Y6 j ]JWillUQB,$K!X-`& qZ |-83D(0`0x\C5811֤UD"ɓ ?66D6mQ(ZSl~+9Ts;_ٹ˔?&~,^YbY\>c);$A"~u{|2ܴBI脫&bm3 H J$t՝Ev<(]<G"e&J,,,4r4e JBm~î܎"HÉo3y{Mk'G$'._O}B\'K w#[aB_ TIWpD>"h>0 B%MR3??_ 䤩,\ iWi~kmS]g[iͶ $T{ '^]OWNGዷ J&_&'J7ϗuojaͭZ|\rox9i犪ڨW"RyсrMAZ f?7˙d\.e|tuuLՅ:I;z{{kl~gϞ]Tc)~|fGQM8>XCpMco<u?By&h ׿Ύ0y[;?z ,`SQl+t4K[-v4)lJVEż\L&}XRHyI"0sg@ pR,Hd٭Iat'O^`k ?Y(F֊q_eAZ?.S<}S{ K;S4-dǘB+a>nlq6--RB(4)ÙyquV9MW-X>ύb*1;;;4Ǵfffg5߹s4w#=.|㌧^_U&|fpfF}i$.uSK, %W fc? hewEXvtědr3;DEѮ.v;SSSLMM5z/Q4:ͯj'brܖa6@/r&gɕ/̧ x؎ZщkX_(L޷85&7[ܷ7 m>}ʁFRNF^V:iv7Ti&t`P:D]*A. \~W- C*r̙5i5{/;rlE+G:k¯܅h硯 ;ǃhㇿ|Ɨ)Xravzq8- ݉Rmm-~,XPn:Uj峌*D|bQtZnknmd3 O&]FW-WPZ+V&]pPY&Rp28ә#H.oˊ;֭v[Co'$Rd_+_~ }ba} #{yg)>bg ZH* ^/nUZE+Z/G#,( .ƶP<7Ȩ*---Ǵ\ *B!6o\c[-7x-`\?އ] )1;cI6QZ"˷KfsA @ܲSn5.}\$Agq#J%}zŦФx<<VK@T蕽xo0̮r&1R6!@9==ewUv%RA\ϩ6YⵅA83 ğ&SiYXU_BV8l^ Ö?zghwot{oegNŭ[9=UXA-i~nn"*’&[W+D!׳vvC# rfggʲ'ݞzf.a!_N0|lj\syw?,C}]me<8X mv޿t8)ZSߤ-׸|V%DD*vWhQJ I^RBڲ$^.,,0>>ͯ|>ٳgפ[O\+Bkk+7od6x<А9>vLsVEI d]4֧'6'o g @+/×nEX֛:tgVPpInxăѤü3>q%\r^˽F7|ei1,m~HdY-ѥ EQoxL˥e'p 0KF-(-z6i~+ ^Z{|~ߧqZBH4g9?'wnxC˷QbMOl?UŪ wR2$+b[v?L+">[VZx փP^wi lKbFA"MJ e#b_:6F ﯑?f+a{ S"U [o>8pEtA~8e n 뺹bh+r_dy8c}{P*P^G؃9mͨR,d 477׵(\ꗜl~шFh](F};B_չ"L1z 3K! u¦=TƇ.w]jpU.y B^( ?yx|kOSSS8qgݞ+.Š2eSFMc=, 8\`&sbo_ød6$ Ja5߹sD"uAc;,Le·ߠ,/M_[xö[]C!wGܽK;|C96n̟*͟?_c\K/ׄR= J6?=Eo\ ]۸. _GGV,+(--l bKaܖVtO2x\Bمs .нNӱ~5C0<3o|<Sw+q󟯱 RMhdMczm~e dJvJ`AOOo``MAq =ApP,WjH 6rE|v&]}[ *)Ehh'd?w `47GiJa]#|0tpy5_Iwo6 `|غذ=.  +SLT_d?lǾ^ Ɯ!h텞]0xd}K !p22 K?#qGrtܺ$N?9f I$%R4&s;h-0,d>K6s40G幅+ދeC 0#+(-}^}8T?%fq2]F\V˯WV^kTJ}r^ʭGQZBMp7#;B\o8}^:!Z[(!4~64;kGN䈈M4] Ll9˙\ gP\UV8fPzfqf}c5_$ܹsumXN-ϲ5pU'~@d˳f ^ڙAG顥,\<6;;m/X#1E1R9 2HևGzB(JR)RR E(Νf0nFZpCVIyՈ5x/˭ͯz.|f,qF/q*]Wfpm} ɺr3p-(]Dػa:g'Gew-%l.' ?d}rF(Gգ| ,ҲċL&C"0WQE(Lx-%J56l6a6?_e@a">E+ԍTZ [*NJ2w~u?^ `R$GԪ;ͯ p%ELOxL_ik Q2 IRNyn.#HΓ|ic#F{{6?Ufpk;-:qkPhY%qC6z%]ͯb_&1Sel~~8,A$攃P.qehe8X1KJ=oN-wY҈|58/2ا:(۬\n l2wf!P r_ޏOJuX LN4 !e$5Cz'ɮ]\6M^=t:i˕S4RQU[{CpZi"gTG#ϭA6d[m= ]a0,gb']D`]}_I;pkP4ё2 IJ"c?l$Rm<7H<0̱UJGdSYsBL9=9MA¡0XǏרBvxx|a3t륗6Klp񧯩˾.455܌%388lA5`b~~Δ;a }VTa86\20>A}労927G N8&LK$g@3BA_ T - C:+`Oe8JQ0wM^ _;>n8?˾ϲ|e&&&-(-l~L4;v3;+)_2z#CZBѫק XAUUi\`T#Hm(z+}埿sϴ;:*ld٪bPbsf,nNدl 촞sXy%@'{VNO$xG逪gt ~B$O^ϨP/k9G8 }~c 59ei$0O21K,;)Xx.  [3xT*1==e(;^1^l[mj6n*?MΥyn,6?_M7nqO>okaWyg1(BsY11XͯX,lt{¿Ŧx;B_$`G--01gO(4tW#JxXMOOmmmiQ3229gN>3[cW"A:fԗB_%P₾sBE @6MvN^=#HYQV/]tle4kw?](~mjm+ [Xx☹څ6Np:S5a0W{>;gFQ"Ȳ[l _[T$-N}=8 ߥ;ǟd_$_o ~`:,V| & u L&d"2A \E+lvM[hL'Xg zh9Ӄi(>11xҌL͕@;wWO}}6o cn󛟟)bVX:a*᲼pYT Tgapp^?H')<#Ӻ:sp?y[NJDa3?C#HUa-|$e&N'VK_u*‘ sWݩ4|~0  v꽭gks0<sOO pY_5C`4BXqC^܈EK*i45;~ >Bh-pR$O")4\GV̈&cccKĕv |ww6t:ej0׵qܗ'@]! T*qܹb7ǛWZ`WU Y;gT¯Px_?[=IJG<6_wW^lR|A+_d|B Mw@бe|~BRf.wS,#JZ ~kn%ќo||.5N&g. ]eN;gY(y$p=imo\oUgܹ9FGG6X~?L{;.NZpyދg(5]"#t/k'B:YF.faxr&.Cvx  ,-Q\)v>\*A\ \]MMQV}hl@":IK'Le'2D2oBϷ'_v]~ӵsfsaJ[:{8[p:vIt'᥎un>H4ħ =wVy׶m6䏋QML^ _G6o򍃝ܱGo =`d^?zySۓ@uFlM*+ 23e/xXclt|Iv7toG:&&H"KV ޝ?)ZRΘp>誣e,̄شI?kPlghqr9,D9ޛHƖ-|?$ac+OON _Fe,3$/pnЭMح*o BC,γ*ϵt'GyA\*B',i9$Sn{ GVҏ@YHfXt1Y[auZ#+nbv@.;gv1Tgrt:ȪN20yhXhmjO0u—nCvxrq6?s0ܑ<1ύ@" Z8= {OYA:_VˊՖ¿%9S75eJ+s2V}5.N+E<B:e+;Vh"-Eq&8)2<ʪ{B]ոࢪ*]nn7 La;d5CZ[-Ouq:hg/S ͜\ZԹR3nJ.Vc*(t'w5~cw>7b{Īpy!ϟEnl=Mnf =&I.?{0$ULXJ\VTHG|r+yZhw;l8IHz)C7vi,4 'Ƙ,J(za8N6Cg8rxS'嵴pGՎBTN=ʼn* <)( v t:M9X詔O繹&i8:wg6}~GߦM1,kcv2ĮVyiiX$JfRIÇ[Iˡ^ClގلDJ ^ȓ NHh@~ST=<ЗVPW+y1]l\x^1Pܭ!Nn!|m &b7Gt%Pl3SRuO t4g⣉Ign1\.:;;k _]իD vЋϣuO2;o w| BQ)XOhqEt גm!y')7\P\@,gGCdDynXtG};vNrcO+ Qm "}AG[[x6Nv7shy^uKI/Dշ$3!?4͚g!Z~]X  6,- tG@9 RHߛ$eCLoT 57?W+f5XVsb8E X;4;|t+OrfH])#Դ4]ɝm헿"eq.jTE\6gn an%םBO} ׉ӓ@!rN"ܕpiZ MX!qu6bz޻4-\.7VʼHV^y5py1DZTRۚZrUC #E6o.yo[BX*o wS86emcOW;!":m /Zqܕ<텢*382al~u#مᰓQ8|2/̫n˻*qPvzc!okdr.Z{_"l~mv~3}m可d U W[Zsq^$PJ+a}l;B<޽~%?ԧ ή`spաRxL9't~p'_ VX,Ule<]Ih6lXj"!UNx.PX#+'jS:qA^ټSL[OBsكtScI;?d*:ZWDWWW5˕Uf]L|>X\#꫺[aS~Ưz; ^9-HV?ucstx 74ժsQ1/ZY\p'iz7a](]xci~k56U䏷sU?Z ^'9FXzտpY[~'k 1{ _Mrpf iLƗ?9|x*ANKʨVT(-Ȳ`WO}E>;SOr|M٥in .WWZRڙDe_ucS X(/Z{ $ /叧W?n.ֶAbS*rrdYd{OcU[^ +0]`H q-1z{{kRɜrPox=Xӽ Ӎa#)& #ދhbT jn (\^]Z~pswQ->t7]N~vzw A o Gwg(`EU#4ɩxvP. TTVYR:@5q v;턤3`W^1`u vM{{{͊hݡBcs=Aђ=4%y(Pl#X=5>@4J'}afB0E-q;懐kEΖ' ?9)&ܺ-Uc',TyC[;U!6#*jeUe0IIJe,ȳ0l69Bބޘr4?Ak=ϰw7~g))ۣOpvMMMk4hv ٞ z&6cJVq#y# +<|S^y @0-鎛S?2n!2sN^^(P V1SyrYJ, HOPVaR ,<ׂ6!1/ Z N=?@Әr0pUҙ;NGGmmm n >:~Ğv!'Ʒցˆ;-|s]<|sE]QDrPW(IkD(EIZQAwt4k|꜕u Wfd].&:nkhbJoChcO6"{;όBv!rfggix*lnb_!ܻp<vb8<,s%qin -K\YEZe\Ğb7vJمyDT~-OimvpbRrƀ [?@X4\i[+w+ c,Ϊc8ok4L[N.g넔Q|Kcf=peRA=z9NNennAݾ}3ԧd^ɧ;lW yc~RfCHfr Abo;ŀҵ o¿];U R+L9t_)v,}'TD^ R&*D/mpNgDhZX1!p {7S٦cS򊹜/ɘS糴Iv%jt$SFKbE')Q͢RvY)6vImi&L_|V>MWZ&oϺ8غGw2 ZGMu?";ZUaT/: u` 5ljr"sPmj$#,X~8C[su[=w`ESfK$K wFWvYDAVN\GGM@LMݑ* Z峮sme] | L9;H I]F(=GJ ˉSc2m +ǓHj{%mM׹H`S'LxHO4i`ǵϚGAV ; %AS H8܅7tdz[=v4e57{!(5"ZUs-USrS,$G$('PʞI2 TB&y2KbnXj Kũ&Ec{ 6+ vyKL u[;C8N5ϙK=$ó)/Rоc3dvw0u];m<\֪[,~7t\ݐN.jFopgo'N9uLx1cʁqKR^6 7 Y=4PT•dG)cJ?N<vŸv9 Qrb xi8/Gm~]t:3'&"_di {/ob^2]~J>{:Z/wzQ8pbxU\s.A2ӞEyQ]mv MÙNTZteB.'I\2H&v }wʽ[SO08!)ؿ ~~͈@G | 7y)S) Kq ?]DCAwU@-q32RJ"tB@_م b:N`/ u˳<8zEjfsRO@(!{6?g^v?UbS)ݩh52EwO3WFt  4)rm%/TqA#?]L̴e 0-VƝN!t^XRL&C&ETbt:6qNTTX޺{y+!{bp:]l6(eIp2#c,^?>Lس2N)ƞp9.n 0 ئ4Ղ(m$$GdJ?ژioc"AֹTxT4>sœ\NgLsdž׵]8;jxSJWOE)iM_X` ss og_a6{o p8(Y| oO<r GBxԆ ɸ}l6,2q?受,WSUBDK9tM33)Z;٣̘\N)wݪX YBLMt`$lrYl:¸.DF)"LMOSl練;}TNX]ޚx3u ojTh6N4bWj=ܾP7:`9_.7反슇m na^pR::O#BzӷTzWL3=TssN'(TL3rycdoWcK@&5B<k3>CaMPnEf[7::J.tym$g82<\sM4IkF|w;DY*btG䜵 ℔1£bQ}#ynXdffbuڹ{[])eeՊ*ϕH1GVdnjm{<sAuvBc;ӏ餠$9z'O$ti" %RDݺjqD)4"c1]*tGhETkl~z$.U{SK\η6ej[(c2d5KAHg[%jeӦMtuus}off)~K{f)2ro?l"iN"$[{xt BR2Uyn#fiIW [@+yn*b||yXvag{;7zlk@xƶAܢ(2a= cLNN6z.F]K)iii! ?rT޻ m^\.7\ywHgN90zKg ibدH{?PJxy6}d\ynp#嘜dzz$B}7F<<"yxNZZZiHΌ39<}Ndf2f 54CWJAfۢFZ۱(k#XS牛w8lgi~8xb=55ԔI,]fߍ97ay͊jC1QGk=AT`f|S K)o-o dkmjaQUL9{]fcu7\.pF dɵz. t\k NRE)%SiGYF8;;j~vwR-s?1E=ɜC?F磌.yJ ovcSX,*Rш& ,!XZ~n-J> wlҷ6*SH@!xxTBoG#)"X8r9:#NNa+ԗ?qk H.u]7ry"_^3f9MNGǘnxb&;<о&6CU,U)GN -`[C+B R8-4I;H7FD46R*#UDW֖i]Jח?f2"H",lvs友l~eQf9O5Aښ;̌N0>>z![>-Xx&e!Y;`hLr )ZX,(ʅ+AZKiW$BG1Btq%_|׳ݵ{eA6-ɸ{GTMw,KJcgyk ƣu7l7;sEU0BP4 G`c)7n^zv׳ŷ&_֒c5ɴsR O022rA6˅V©=mۿ5,/͒mb7q@"ܭy!4e6p@}c}7P>6˅5v:ͮ(ŋ.W!Pؘ9;fo$U%>G#$9$<toǹR?*UǤͯ n\XUaZimmq);΂DZ75}Noqȟahc:yЄsB6gҥw+y$ccck$5aECss3pwa8ϭǬ9Q(=Z_{/FP>#S(+X(aM%}>b友m~sy"I]xF,R' xwXǪ\:be`wq/OYv vr`YÚ)LNh>h D7GjhlwA'wh{4”ܶ9 VeXυ o!Y]`0NpP(011Xvi&I tk×7;knxzDcbW?} TI|o] &vfAl7@WŽ%e!B*d*L֔Ei.6+1ҘlREb9PXdc6?:"#21:#2J:ϠF&h6@?-.uzIJ6ڴ&lmf;B-I !Vt` 8Ս?;Q-$jJX>anrzE T*E]]۶m?1To;vpI<^/EEE444PTT~pi3*Un g f6d'Cc?q+X:t5uu=;H*pV)^7+芊TTtECWU㿅 d6}%гJaMiUC{F7Aft ?x~8}+c{S vp_`5JR&gبLŐNُVS_ v믿5k x7>cǎqmqFΟ?϶m(//w5(dØ.X__OEEhӧO^ S!" :.</}6Ҙ<.FAhKwm_9|~nuPuZ(D(ĂݤtYzšN]0 #.ΕHBA/jp8rfQg wH7|[lGgo;}p}}} с!Bss^3ќchZ.uV'j+/0͇E\Up ^; 禧ຄ7:s =p.D{ Yh:iZiddx#gާGãP" 1b|/zF1GcJBb౶ s颕n\3QZ´٧e22_Gl=w۸OsilTVV8w}ktvvRVVF$ܹslٲ`0ȱcXf s/fFR-H_h4J,+XʅNii)tvv299{%6ÿlbiyn:;~}Ӧ 2r^RV|(D%6hHĞtrNVs!@(VZukk({WPDaH4?z0T{np:")$I:{Dd (}:H > ٴiSfd# x?huhh .JhooMf5 !;\G322Bgg'XP_\r-pw+z͔l +.ZL }^jO2*s#~X兔ƠFS)үAzҎTe0ԅutO7ʽ'@q0KonjrS I"4*PIq΂wkJSdee%```VyQU;vyf$ǎܹs !y{Yi_Q?LCUU"gΜ)05_oT7fau9P ':!N:V~UnG/AbK#9DI\w+TU\cEofR"E>kk S&Q45.OuL3b$ƉF d2(ک^?ggWkilldll5kDmn<ȫɓ'u E(**Zv;R9y\kB;BP:u]ύMӄB!8RJT+cK`[h1Cqix|2LvWwqu8*NEx}g:nYGĨ_"" ݺʗSuww5 ;nbjR^';-%zw] n}MttƜ!=0d@o`~q#}Lk?wy5A`ʻ(r XTU~?t`WtHSSS眮W~%;QŦRʿJ)>}]^:YKY  CfRˈ|yrg7 ^;'&{1v'RTԥ 0tRF/ 󉛱__O6."r=¥."v ;~6w4cjʁFGw1Wd6===9G+7ZJp5\nwz*W<|_$ͲeN8(LNNֆɊ_x⪪Jmm-----=)y!*iEEQI) ؞JKJ(/ކMe\īcr1}~= 9Gz`|=K<*?:x <`hRkP9 vo-;3RYGVa%/eir-IT{Cکxg97#b~xY) LQlU&y/Ml]vՆJh \AC$Dڡ0zuT? 9˫!Bp뭷RZZc=$b1˳>s㭵k?Bz++Ȫy6Jy#o>iJlhQ<.P.Ɉ îbZh,J4?Q!65#ͤCx㕀 6ĹsԐL&O444DD"}^fq8 s !~zl6{ٯwߍ477k ]6 %ʵZ[><"5j##,+(P}Wϭq~=ʈE".(vo$E!ʼnW4׶/0^ZEV(T@/ 05P}49 ]M:kk Ր4RYPt1t5,+DP4b3cl{x l6;B3\3n0?NL'Nr#Fϟ`Σv5bOR!Evə3gؽ{7/"]wgϞcƍ444pIr \MMMx<S46BÞ*?xuwuUv~̏op0XdbE&U>9 WXƟ[TRs@d\DS%cŒ.NU݌6G#{[Zcr֙V c48/7$bjE4F*+7zOd~?a&ws?"㞚 WH(N\bCMi"(~wQ(Ibtdǃi h1_qq!}j)5)xx7)..f߾}׿ƳvoZ[[)--5@^1u]O !x'g}ݜJ'|Cp8RB6ϚcR eb6:SB<~e:]wkz #x#A`]詭*pæ":nYP^G!8"~g)Mn'=9E۰y=\d1Iϸsd&` 9%^9n~RIv%.s*nFx l6D)%l޼dW_}ODm65SF?Rp8faQ~'xKQ[RCRʿBR5D6ȔyYi3յtvv.jpreUgղE_eSP0\r}qJ֎ᮎ"4Lgs&U"}Lv )qYU5BR$3CaYѳ:uSV]]XϤ7c{Rbh53s S񗔼OÞ=5#FGQqQmؽ_L 01l~#!z{zr6?6e He<&sKaD;#=z(8N{9v͞={p\H)ν^Q*++imm| |䥝YM! !H)tzjqN.3`b}O0 .ඝ|:l,"2HQJ^k6oITg05I]2qĈ+w6~Z= \\2@Aj8kӴIC8Z6ʝH%J|oMfS c\jH5bp>/A^%~>[[>$3Cuu53<<(lٲkr|A֯_OCCh|MTUU1>>^ ,..{oyY!?R[GydIΒ'3<@Z=dia) !*ejFCCSP_Qǝוail }l6?w UGoEm!^##or2zsBDb`u5iyj6K~?H$B (dlbW8QjHMj"i6?Qa #6 E˻nKEss3daرc=;v`׮]<쳬_bwdpp0w ESS8FjpyxPL欦i(2guyXhL"l}&y̏oR:Y,tvZb` @SʟR7TMm3ACyA,˪]mx.rqTe6&KMs/O`4scRR<1AC1}xY&I(Xlmgg-NY0zj+M`ٰ!+ #{9C$3IƇEKAPGENAuu5{lݺ&|>x'FJuW";@\ae[WI_,Ʌ`cՙm?WsǦq4f+x|Ĺ2^d7',ogdTpƌ"RJ&&&/gvֺNںRkl=OUg2"Ð{{'L:\C㥿z3ɓucb,\?#3@nH@X-fo_n«zs5M˅Vb= ёܔ76[@.5"ns:fWʱchF:GN0̽v6WM 'K-X\q<a+.RoVXJI&!JNUgWZ9M,GG2I@D2|c(6z6w N\+\>`5Ā{,` u6yq,9i>OzeUUinnfϞ=x<>#B{嗿%RJO{{;?Ogxth$7( >|pEbpU—e٬qj;7նh RR9Ս 1XSE6SX9޽{illW_e|'|~qee%6l6XiSl6UM`u˴J)mqBidΥ`K hl`|ўJQ?3^(! ͰمumQ55/dr pi) ̔66eH¾bmlP`Gof3v+u$~$(`˖-l۶j~S[[ˮ]x衇Po|Ή'8~xbSSc;'!ă+ q%qw `MYl_L~<]8PoSiǬN( }x'SByFi6?AkvZ3q4?K3iO )J޴]g;ϕH!`tn.v:hzš (FA¼Y^7}nv444p-plق{躞tsNzv\Y5kl~ 7F_6w5˳ʁU7ܡifR'!maaqO6QLcd~FfUh]z\q3cʁ'sa+ɍI@A2TQKO}{!%E}38qS?5Nm8J~np1|>$@Ix8r۷ogpꨨ  .i{R.>5+tu]VXR)plc+y7TKKol`dS:88CsoNl߳x7Rk]ӈO5-Xh4,.mdAq6˸ɶ&|)ͯt,DC_?EkU6W 6K% 6m Wqׯ*9k:8WY[^ |>G.uLc.|WDL؂U2m7śd[qJEd_8B}a6!@ͯD+aw_n3͗?Fm1#Qzbϋݜflv3ﳧA'D7C_jJqqB)!H)/rSSZ ]i)l̵,1(E8uEL(YMl/F `6""SQmld˶I!32fM`0HMEsW.7Q^^Nkk+} !~$xT㳄 Ki֘?ebQ.?Nfs6J]48?j٩X@ P`󻕍9`G~wُ#0aƉ:BQIaEO-b L_{5/%>`vӶxlT*544TPLWӷq6I[~s|N~ֲv\bݲQ:n L-"(QF2I2?"2l~6r3O` yL)˼q>b>0aWE-lnƧe3 GHĬ6ݦͯql~V!+ڞCLzKC{&B4퉢{ eL`˰f4>7iůM JgY ib;UJ^a77a n`AL,gHu1|. l"fSu="3tq/ m~aauHd ˩pi~+mr MwZ?FQFd#o7el67ili>ĵ&iymR <7ՔKf)t BcTZN4e 9m~6 ϳ芆j3A&<@*tnYi~u=oZX%;EQ,w?2mѬ&hJYpa53@T GrS) TqJݒ-m~LEbZZZ]GEQu}iq 4V < ֶS:fmȳ-/Y"W2l~빞. ^2m~K-NF1m~l%zTUρn{ͰlE Ux<ٳg.g&[t-ޘ}v\I咅(^r~l~pHSSbm~B]0Sj*Ȧmqmi N6Bg01()ޥeK1m~zzK*_R%mNYK@ m|L]*fr r0UTeee-GoyJebZ5Roc/VKBKni~![t#M9XϏaI6]4X%%U|"0m_f^T~\UYߎ# xEO[a{\Cw<pal杶'ˬ itP;hCէҼO/G8M'iShY6Mϛra+xIfzEj4tP6sap ǒZ[[Y4BB<4ﳆU mR;-Bs=0@?a!?e WUT+U_Xatۢ \4? &Y ,gi;LF_ /V |'tK)$sXRJFFFbxx8PRPQQ[EyF*/3\,EQt]c bBT!tr%cU!3=WJ++?d2hAJטǶx<ނM9p\3cBL挦iĽX%F4 M˳-:2'&&b`` 7bX/ɔw6JpwZi[>xxx (rHOB**"X& U' bBT*炙;73؂M9XͯsZ]Juuu[MiXj{Dw'WW)V |#=϶x`>ۢk^1Xߋy6{ucH)-9-Α^wœ]Ws?X%{/t"rm3G۶8m~B?ug5ta2XaHdgmѻ"='isb4V )Vu3?QJYi~Wh6M˟^WV4tttgxFJ١(o1^!*ۼ(I)?Bd*++կ~u?*3ii ԘO jN;(ʪ3U'Tu]'_YUWlV叟%г%tEXtdate:create2020-07-31T14:58:15+00:00o%tEXtdate:modify2020-07-31T14:58:15+00:00kIENDB`dtplyr/man/dtplyr-package.Rd0000644000176200001440000000172314172100676015550 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/dtplyr-package.R \docType{package} \name{dtplyr-package} \alias{dtplyr} \alias{dtplyr-package} \title{dtplyr: Data Table Back-End for 'dplyr'} \description{ \if{html}{\figure{logo.png}{options: align='right' alt='logo' width='120'}} Provides a data.table backend for 'dplyr'. The goal of 'dtplyr' is to allow you to write 'dplyr' code that is automatically translated to the equivalent, but usually much faster, data.table code. } \seealso{ Useful links: \itemize{ \item \url{https://dtplyr.tidyverse.org} \item \url{https://github.com/tidyverse/dtplyr} \item Report bugs at \url{https://github.com/tidyverse/dtplyr/issues} } } \author{ \strong{Maintainer}: Hadley Wickham \email{hadley@rstudio.com} Authors: \itemize{ \item Maximilian Girlich \item Mark Fairbanks \item Ryan Dickerson } Other contributors: \itemize{ \item RStudio [copyright holder, funder] } } \keyword{internal} dtplyr/man/pivot_longer.dtplyr_step.Rd0000644000176200001440000001137114021443044017710 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/step-call-pivot_longer.R \name{pivot_longer.dtplyr_step} \alias{pivot_longer.dtplyr_step} \title{Pivot data from wide to long} \usage{ \method{pivot_longer}{dtplyr_step}( data, cols, names_to = "name", names_prefix = NULL, names_sep = NULL, names_pattern = NULL, names_ptypes = NULL, names_transform = NULL, names_repair = "check_unique", values_to = "value", values_drop_na = FALSE, values_ptypes = NULL, values_transform = NULL, ... ) } \arguments{ \item{data}{A \code{\link[=lazy_dt]{lazy_dt()}}.} \item{cols}{<\code{\link[tidyr:tidyr_tidy_select]{tidy-select}}> Columns to pivot into longer format.} \item{names_to}{A string specifying the name of the column to create from the data stored in the column names of \code{data}. Can be a character vector, creating multiple columns, if \code{names_sep} or \code{names_pattern} is provided. In this case, there are two special values you can take advantage of: \itemize{ \item \code{NA} will discard that component of the name. \item \code{.value} indicates that component of the name defines the name of the column containing the cell values, overriding \code{values_to}. }} \item{names_prefix}{A regular expression used to remove matching text from the start of each variable name.} \item{names_sep}{If \code{names_to} contains multiple values, these arguments control how the column name is broken up. \code{names_sep} takes the same specification as \code{\link[tidyr:separate]{separate()}}, and can either be a numeric vector (specifying positions to break on), or a single string (specifying a regular expression to split on). \code{names_pattern} takes the same specification as \code{\link[tidyr:extract]{extract()}}, a regular expression containing matching groups (\verb{()}). If these arguments do not give you enough control, use \code{pivot_longer_spec()} to create a spec object and process manually as needed.} \item{names_pattern}{If \code{names_to} contains multiple values, these arguments control how the column name is broken up. \code{names_sep} takes the same specification as \code{\link[tidyr:separate]{separate()}}, and can either be a numeric vector (specifying positions to break on), or a single string (specifying a regular expression to split on). \code{names_pattern} takes the same specification as \code{\link[tidyr:extract]{extract()}}, a regular expression containing matching groups (\verb{()}). If these arguments do not give you enough control, use \code{pivot_longer_spec()} to create a spec object and process manually as needed.} \item{names_ptypes, names_transform, values_ptypes, values_transform}{Not currently supported by dtplyr.} \item{names_repair}{What happens if the output has invalid column names? The default, \code{"check_unique"} is to error if the columns are duplicated. Use \code{"minimal"} to allow duplicates in the output, or \code{"unique"} to de-duplicated by adding numeric suffixes. See \code{\link[vctrs:vec_as_names]{vctrs::vec_as_names()}} for more options.} \item{values_to}{A string specifying the name of the column to create from the data stored in cell values. If \code{names_to} is a character containing the special \code{.value} sentinel, this value will be ignored, and the name of the value column will be derived from part of the existing column names.} \item{values_drop_na}{If \code{TRUE}, will drop rows that contain only \code{NA}s in the \code{value_to} column. This effectively converts explicit missing values to implicit missing values, and should generally be used only when missing values in \code{data} were created by its structure.} \item{...}{Additional arguments passed on to methods.} } \description{ This is a method for the tidyr \code{pivot_longer()} generic. It is translated to \code{\link[data.table:melt.data.table]{data.table::melt()}} } \examples{ library(tidyr) # Simplest case where column names are character data relig_income_dt <- lazy_dt(relig_income) relig_income_dt \%>\% pivot_longer(!religion, names_to = "income", values_to = "count") # Slightly more complex case where columns have common prefix, # and missing missings are structural so should be dropped. billboard_dt <- lazy_dt(billboard) billboard \%>\% pivot_longer( cols = starts_with("wk"), names_to = "week", names_prefix = "wk", values_to = "rank", values_drop_na = TRUE ) # Multiple variables stored in column names lazy_dt(who) \%>\% pivot_longer( cols = new_sp_m014:newrel_f65, names_to = c("diagnosis", "gender", "age"), names_pattern = "new_?(.*)_(.)(.*)", values_to = "count" ) # Multiple observations per row anscombe_dt <- lazy_dt(anscombe) anscombe_dt \%>\% pivot_longer( everything(), names_to = c(".value", "set"), names_pattern = "(.)(.)" ) } dtplyr/man/collect.dtplyr_step.Rd0000644000176200001440000000320614013221715016624 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/step.R \name{collect.dtplyr_step} \alias{collect.dtplyr_step} \alias{compute.dtplyr_step} \alias{as.data.table.dtplyr_step} \alias{as.data.frame.dtplyr_step} \alias{as_tibble.dtplyr_step} \title{Force computation of a lazy data.table} \usage{ \method{collect}{dtplyr_step}(x, ...) \method{compute}{dtplyr_step}(x, name = unique_name(), ...) \method{as.data.table}{dtplyr_step}(x, keep.rownames = FALSE, ...) \method{as.data.frame}{dtplyr_step}(x, ...) \method{as_tibble}{dtplyr_step}(x, ..., .name_repair = "check_unique") } \arguments{ \item{x}{A \link{lazy_dt}} \item{...}{Arguments used by other methods.} \item{name}{Name of intermediate data.table.} \item{keep.rownames}{Ignored as dplyr never preserves rownames.} \item{.name_repair}{Treatment of problematic column names} } \description{ \itemize{ \item \code{collect()} returns a tibble, grouped if needed. \item \code{compute()} generates an intermediate assignment in the translation. \item \code{as.data.table()} returns a data.table. \item \code{as.data.frame()} returns a data frame. \item \code{as_tibble()} returns a tibble. } } \examples{ library(dplyr, warn.conflicts = FALSE) dt <- lazy_dt(mtcars) # Generate translation avg_mpg <- dt \%>\% filter(am == 1) \%>\% group_by(cyl) \%>\% summarise(mpg = mean(mpg)) # Show translation and temporarily compute result avg_mpg # compute and return tibble avg_mpg_tb <- as_tibble(avg_mpg) avg_mpg_tb # compute and return data.table avg_mpg_dt <- data.table::as.data.table(avg_mpg) avg_mpg_dt # modify translation to use intermediate assignment compute(avg_mpg) } dtplyr/man/replace_na.dtplyr_step.Rd0000644000176200001440000000217614021443044017275 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/replace_na.R \name{replace_na.dtplyr_step} \alias{replace_na.dtplyr_step} \title{Replace NAs with specified values} \usage{ \method{replace_na}{dtplyr_step}(data, replace = list()) } \arguments{ \item{data}{A \code{\link[=lazy_dt]{lazy_dt()}}.} \item{replace}{If \code{data} is a data frame, \code{replace} takes a list of values, with one value for each column that has \code{NA} values to be replaced. If \code{data} is a vector, \code{replace} takes a single value. This single value replaces all of the \code{NA} values in the vector.} } \description{ This is a method for the tidyr \code{replace_na()} generic. It is translated to \code{\link[data.table:coalesce]{data.table::fcoalesce()}}. Note that unlike \code{tidyr::replace_na()}, \code{data.table::fcoalesce()} cannot replace \code{NULL} values in lists. } \examples{ library(tidyr) # Replace NAs in a data frame dt <- lazy_dt(tibble(x = c(1, 2, NA), y = c("a", NA, "b"))) dt \%>\% replace_na(list(x = 0, y = "unknown")) # Replace NAs using `dplyr::mutate()` dt \%>\% dplyr::mutate(x = replace_na(x, 0)) } dtplyr/man/pivot_wider.dtplyr_step.Rd0000644000176200001440000001041614021424751017537 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/step-call-pivot_wider.R \name{pivot_wider.dtplyr_step} \alias{pivot_wider.dtplyr_step} \title{Pivot data from long to wide} \usage{ \method{pivot_wider}{dtplyr_step}( data, id_cols = NULL, names_from = name, names_prefix = "", names_sep = "_", names_glue = NULL, names_sort = FALSE, names_repair = "check_unique", values_from = value, values_fill = NULL, values_fn = NULL, ... ) } \arguments{ \item{data}{A \code{\link[=lazy_dt]{lazy_dt()}}.} \item{id_cols}{<\code{\link[tidyr:tidyr_tidy_select]{tidy-select}}> A set of columns that uniquely identifies each observation. Defaults to all columns in \code{data} except for the columns specified in \code{names_from} and \code{values_from}. Typically used when you have redundant variables, i.e. variables whose values are perfectly correlated with existing variables.} \item{names_from}{<\code{\link[tidyr:tidyr_tidy_select]{tidy-select}}> A pair of arguments describing which column (or columns) to get the name of the output column (\code{names_from}), and which column (or columns) to get the cell values from (\code{values_from}). If \code{values_from} contains multiple values, the value will be added to the front of the output column.} \item{names_prefix}{String added to the start of every variable name. This is particularly useful if \code{names_from} is a numeric vector and you want to create syntactic variable names.} \item{names_sep}{If \code{names_from} or \code{values_from} contains multiple variables, this will be used to join their values together into a single string to use as a column name.} \item{names_glue}{Instead of \code{names_sep} and \code{names_prefix}, you can supply a glue specification that uses the \code{names_from} columns (and special \code{.value}) to create custom column names.} \item{names_sort}{Should the column names be sorted? If \code{FALSE}, the default, column names are ordered by first appearance.} \item{names_repair}{What happens if the output has invalid column names? The default, \code{"check_unique"} is to error if the columns are duplicated. Use \code{"minimal"} to allow duplicates in the output, or \code{"unique"} to de-duplicated by adding numeric suffixes. See \code{\link[vctrs:vec_as_names]{vctrs::vec_as_names()}} for more options.} \item{values_from}{<\code{\link[tidyr:tidyr_tidy_select]{tidy-select}}> A pair of arguments describing which column (or columns) to get the name of the output column (\code{names_from}), and which column (or columns) to get the cell values from (\code{values_from}). If \code{values_from} contains multiple values, the value will be added to the front of the output column.} \item{values_fill}{Optionally, a (scalar) value that specifies what each \code{value} should be filled in with when missing. This can be a named list if you want to apply different aggregations to different value columns.} \item{values_fn}{A function, the default is \code{length()}. Note this is different behavior than \code{tidyr::pivot_wider()}, which returns a list column by default.} \item{...}{Additional arguments passed on to methods.} } \description{ This is a method for the tidyr \code{pivot_wider()} generic. It is translated to \code{\link[data.table:dcast.data.table]{data.table::dcast()}} } \examples{ library(tidyr) fish_encounters_dt <- lazy_dt(fish_encounters) fish_encounters_dt fish_encounters_dt \%>\% pivot_wider(names_from = station, values_from = seen) # Fill in missing values fish_encounters_dt \%>\% pivot_wider(names_from = station, values_from = seen, values_fill = 0) # Generate column names from multiple variables us_rent_income_dt <- lazy_dt(us_rent_income) us_rent_income_dt us_rent_income_dt \%>\% pivot_wider(names_from = variable, values_from = c(estimate, moe)) # When there are multiple `names_from` or `values_from`, you can use # use `names_sep` or `names_glue` to control the output variable names us_rent_income_dt \%>\% pivot_wider( names_from = variable, names_sep = ".", values_from = c(estimate, moe) ) # Can perform aggregation with values_fn warpbreaks_dt <- lazy_dt(as_tibble(warpbreaks[c("wool", "tension", "breaks")])) warpbreaks_dt warpbreaks_dt \%>\% pivot_wider( names_from = wool, values_from = breaks, values_fn = mean ) } dtplyr/man/select.dtplyr_step.Rd0000644000176200001440000000165614006775461016503 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/step-subset-select.R \name{select.dtplyr_step} \alias{select.dtplyr_step} \title{Subset columns using their names} \usage{ \method{select}{dtplyr_step}(.data, ...) } \arguments{ \item{.data}{A \code{\link[=lazy_dt]{lazy_dt()}}.} \item{...}{<\code{\link[dplyr:dplyr_tidy_select]{tidy-select}}> One or more unquoted expressions separated by commas. Variable names can be used as if they were positions in the data frame, so expressions like \code{x:y} can be used to select a range of variables.} } \description{ This is a method for the dplyr \code{\link[=select]{select()}} generic. It is translated to the \code{j} argument of \verb{[.data.table}. } \examples{ library(dplyr, warn.conflicts = FALSE) dt <- lazy_dt(data.frame(x1 = 1, x2 = 2, y1 = 3, y2 = 4)) dt \%>\% select(starts_with("x")) dt \%>\% select(ends_with("2")) dt \%>\% select(z1 = x1, z2 = x2) } dtplyr/man/nest.dtplyr_step.Rd0000644000176200001440000000312314126602474016160 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/step-nest.R \name{nest.dtplyr_step} \alias{nest.dtplyr_step} \title{Nest} \usage{ \method{nest}{dtplyr_step}(.data, ..., .names_sep = NULL, .key = deprecated()) } \arguments{ \item{.data}{A data frame.} \item{...}{<\code{\link[tidyr:tidyr_tidy_select]{tidy-select}}> Columns to nest, specified using name-variable pairs of the form \code{new_col = c(col1, col2, col3)}. The right hand side can be any valid tidy select expression.} \item{.names_sep}{If \code{NULL}, the default, the names will be left as is. In \code{nest()}, inner names will come from the former outer names; in \code{unnest()}, the new outer names will come from the inner names. If a string, the inner and outer names will be used together. In \code{nest()}, the names of the new outer columns will be formed by pasting together the outer and the inner column names, separated by \code{names_sep}. In \code{unnest()}, the new inner names will have the outer names (+ \code{names_sep}) automatically stripped. This makes \code{names_sep} roughly symmetric between nesting and unnesting.} \item{.key}{Not supported.} \item{data}{A \code{\link[=lazy_dt]{lazy_dt()}}.} } \description{ This is a method for the tidyr \code{\link[tidyr:nest]{tidyr::nest()}} generic. It is translated using the non-nested variables in the \code{by} argument and \code{.SD} in the \code{j} argument. } \examples{ if (require("tidyr", quietly = TRUE)) { dt <- lazy_dt(tibble(x = c(1, 2, 1), y = c("a", "a", "b"))) dt \%>\% nest(data = y) dt \%>\% dplyr::group_by(x) \%>\% nest() } } dtplyr/man/group_by.dtplyr_step.Rd0000644000176200001440000000402514172101033017021 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/step-group.R \name{group_by.dtplyr_step} \alias{group_by.dtplyr_step} \alias{ungroup.dtplyr_step} \title{Group and ungroup} \usage{ \method{group_by}{dtplyr_step}(.data, ..., .add = FALSE, add = deprecated(), arrange = TRUE) \method{ungroup}{dtplyr_step}(.data, ...) } \arguments{ \item{.data}{A \code{\link[=lazy_dt]{lazy_dt()}}} \item{...}{In \code{group_by()}, variables or computations to group by. Computations are always done on the ungrouped data frame. To perform computations on the grouped data, you need to use a separate \code{mutate()} step before the \code{group_by()}. Computations are not allowed in \code{nest_by()}. In \code{ungroup()}, variables to remove from the grouping.} \item{.add, add}{When \code{FALSE}, the default, \code{group_by()} will override existing groups. To add to the existing groups, use \code{.add = TRUE}. This argument was previously called \code{add}, but that prevented creating a new grouping variable called \code{add}, and conflicts with our naming conventions.} \item{arrange}{If \code{TRUE}, will automatically arrange the output of subsequent grouped operations by group. If \code{FALSE}, output order will be left unchanged. In the generated data.table code this switches between using the \code{keyby} (\code{TRUE}) and \code{by} (\code{FALSE}) arguments.} } \description{ These are methods for dplyr's \code{\link[=group_by]{group_by()}} and \code{\link[=ungroup]{ungroup()}} generics. Grouping is translated to the either \code{keyby} and \code{by} argument of \verb{[.data.table} depending on the value of the \code{arrange} argument. } \examples{ library(dplyr, warn.conflicts = FALSE) dt <- lazy_dt(mtcars) # group_by() is usually translated to `keyby` so that the groups # are ordered in the output dt \%>\% group_by(cyl) \%>\% summarise(mpg = mean(mpg)) # use `arrange = FALSE` to instead use `by` so the original order # or groups is preserved dt \%>\% group_by(cyl, arrange = FALSE) \%>\% summarise(mpg = mean(mpg)) } dtplyr/man/head.dtplyr_step.Rd0000644000176200001440000000155014006775461016116 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/step-call.R \name{head.dtplyr_step} \alias{head.dtplyr_step} \alias{tail.dtplyr_step} \title{Subset first or last rows} \usage{ \method{head}{dtplyr_step}(x, n = 6L, ...) \method{tail}{dtplyr_step}(x, n = 6L, ...) } \arguments{ \item{x}{A \code{\link[=lazy_dt]{lazy_dt()}}} \item{n}{Number of rows to select. Can use a negative number to instead drop rows from the other end.} \item{...}{Passed on to \code{\link[=head]{head()}}/\code{\link[=tail]{tail()}}.} } \description{ These are methods for the base generics \code{\link[=head]{head()}} and \code{\link[=tail]{tail()}}. They are not translated. } \examples{ library(dplyr, warn.conflicts = FALSE) dt <- lazy_dt(data.frame(x = 1:10)) # first three rows head(dt, 3) # last three rows tail(dt, 3) # drop first three rows tail(dt, -3) } dtplyr/man/separate.dtplyr_step.Rd0000644000176200001440000000410314126601265017007 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/step-subset-separate.R \name{separate.dtplyr_step} \alias{separate.dtplyr_step} \title{Separate a character column into multiple columns with a regular expression or numeric locations} \usage{ \method{separate}{dtplyr_step}( data, col, into, sep = "[^[:alnum:]]+", remove = TRUE, convert = FALSE, ... ) } \arguments{ \item{data}{A \code{\link[=lazy_dt]{lazy_dt()}}.} \item{col}{Column name or position. This argument is passed by expression and supports quasiquotation (you can unquote column names or column positions).} \item{into}{Names of new variables to create as character vector. Use \code{NA} to omit the variable in the output.} \item{sep}{Separator between columns. The default value is a regular expression that matches any sequence of non-alphanumeric values.} \item{remove}{If TRUE, remove the input column from the output data frame.} \item{convert}{If TRUE, will run type.convert() with as.is = TRUE on new columns. This is useful if the component columns are integer, numeric or logical. NB: this will cause string "NA"s to be converted to NAs.} \item{...}{Arguments passed on to methods} } \description{ This is a method for the \code{\link[tidyr:separate]{tidyr::separate()}} generic. It is translated to \code{\link[data.table:tstrsplit]{data.table::tstrsplit()}} in the \code{j} argument of \verb{[.data.table}. } \examples{ library(tidyr) # If you want to split by any non-alphanumeric value (the default): df <- lazy_dt(data.frame(x = c(NA, "x.y", "x.z", "y.z")), "DT") df \%>\% separate(x, c("A", "B")) # If you just want the second variable: df \%>\% separate(x, c(NA, "B")) # Use regular expressions to separate on multiple characters: df <- lazy_dt(data.frame(x = c(NA, "x?y", "x.z", "y:z")), "DT") df \%>\% separate(x, c("A","B"), sep = "([.?:])") # convert = TRUE detects column classes: df <- lazy_dt(data.frame(x = c("x:1", "x:2", "y:4", "z", NA)), "DT") df \%>\% separate(x, c("key","value"), ":") \%>\% str df \%>\% separate(x, c("key","value"), ":", convert = TRUE) \%>\% str } dtplyr/man/slice.dtplyr_step.Rd0000644000176200001440000000556014150760302016305 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/step-subset-slice.R \name{slice.dtplyr_step} \alias{slice.dtplyr_step} \alias{slice_head.dtplyr_step} \alias{slice_tail.dtplyr_step} \alias{slice_min.dtplyr_step} \alias{slice_max.dtplyr_step} \title{Subset rows using their positions} \usage{ \method{slice}{dtplyr_step}(.data, ...) \method{slice_head}{dtplyr_step}(.data, ..., n, prop) \method{slice_tail}{dtplyr_step}(.data, ..., n, prop) \method{slice_min}{dtplyr_step}(.data, order_by, ..., n, prop, with_ties = TRUE) \method{slice_max}{dtplyr_step}(.data, order_by, ..., n, prop, with_ties = TRUE) } \arguments{ \item{.data}{A \code{\link[=lazy_dt]{lazy_dt()}}.} \item{...}{Positive integers giving rows to select, or negative integers giving rows to drop.} \item{n, prop}{Provide either \code{n}, the number of rows, or \code{prop}, the proportion of rows to select. If neither are supplied, \code{n = 1} will be used. If a negative value of \code{n} or \code{prop} is provided, the specified number or proportion of rows will be removed. If \code{n} is greater than the number of rows in the group (or \code{prop > 1}), the result will be silently truncated to the group size. If the \code{prop}ortion of a group size does not yield an integer number of rows, the absolute value of \code{prop*n()} is rounded down.} \item{order_by}{Variable or function of variables to order by.} \item{with_ties}{Should ties be kept together? The default, \code{TRUE}, may return more rows than you request. Use \code{FALSE} to ignore ties, and return the first \code{n} rows.} } \description{ These are methods for the dplyr \code{\link[=slice]{slice()}}, \code{slice_head()}, \code{slice_tail()}, \code{slice_min()}, \code{slice_max()} and \code{slice_sample()} generics. They are translated to the \code{i} argument of \verb{[.data.table}. Unlike dplyr, \code{slice()} (and \code{slice()} alone) returns the same number of rows per group, regardless of whether or not the indices appear in each group. } \examples{ library(dplyr, warn.conflicts = FALSE) dt <- lazy_dt(mtcars) dt \%>\% slice(1, 5, 10) dt \%>\% slice(-(1:4)) # First and last rows based on existing order dt \%>\% slice_head(n = 5) dt \%>\% slice_tail(n = 5) # Rows with minimum and maximum values of a variable dt \%>\% slice_min(mpg, n = 5) dt \%>\% slice_max(mpg, n = 5) # slice_min() and slice_max() may return more rows than requested # in the presence of ties. Use with_ties = FALSE to suppress dt \%>\% slice_min(cyl, n = 1) dt \%>\% slice_min(cyl, n = 1, with_ties = FALSE) # slice_sample() allows you to random select with or without replacement dt \%>\% slice_sample(n = 5) dt \%>\% slice_sample(n = 5, replace = TRUE) # you can optionally weight by a variable - this code weights by the # physical weight of the cars, so heavy cars are more likely to get # selected dt \%>\% slice_sample(weight_by = wt, n = 5) } dtplyr/man/arrange.dtplyr_step.Rd0000644000176200001440000000172714007004012016614 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/step-subset-arrange.R \name{arrange.dtplyr_step} \alias{arrange.dtplyr_step} \title{Arrange rows by column values} \usage{ \method{arrange}{dtplyr_step}(.data, ..., .by_group = FALSE) } \arguments{ \item{.data}{A \code{\link[=lazy_dt]{lazy_dt()}}.} \item{...}{<\code{\link[dplyr:dplyr_data_masking]{data-masking}}> Variables, or functions of variables. Use \code{\link[dplyr:desc]{desc()}} to sort a variable in descending order.} \item{.by_group}{If \code{TRUE}, will sort first by grouping variable. Applies to grouped data frames only.} } \description{ This is a method for dplyr generic \code{\link[=arrange]{arrange()}}. It is translated to an \code{\link[=order]{order()}} call in the \code{i} argument of \verb{[.data.table}. } \examples{ library(dplyr, warn.conflicts = FALSE) dt <- lazy_dt(mtcars) dt \%>\% arrange(vs, cyl) dt \%>\% arrange(desc(vs), cyl) dt \%>\% arrange(across(mpg:disp)) } dtplyr/man/fill.dtplyr_step.Rd0000644000176200001440000000460614021424751016136 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/fill.R \name{fill.dtplyr_step} \alias{fill.dtplyr_step} \title{Fill in missing values with previous or next value} \usage{ \method{fill}{dtplyr_step}(data, ..., .direction = c("down", "up", "downup", "updown")) } \arguments{ \item{data}{A data frame.} \item{...}{<\code{\link[tidyr:tidyr_tidy_select]{tidy-select}}> Columns to fill.} \item{.direction}{Direction in which to fill missing values. Currently either "down" (the default), "up", "downup" (i.e. first down and then up) or "updown" (first up and then down).} } \description{ This is a method for the tidyr \code{fill()} generic. It is translated to \code{\link[data.table:nafill]{data.table::nafill()}}. Note that \code{data.table::nafill()} currently only works for integer and double columns. } \examples{ library(tidyr) # Value (year) is recorded only when it changes sales <- lazy_dt(tibble::tribble( ~quarter, ~year, ~sales, "Q1", 2000, 66013, "Q2", NA, 69182, "Q3", NA, 53175, "Q4", NA, 21001, "Q1", 2001, 46036, "Q2", NA, 58842, "Q3", NA, 44568, "Q4", NA, 50197, "Q1", 2002, 39113, "Q2", NA, 41668, "Q3", NA, 30144, "Q4", NA, 52897, "Q1", 2004, 32129, "Q2", NA, 67686, "Q3", NA, 31768, "Q4", NA, 49094 )) # `fill()` defaults to replacing missing data from top to bottom sales \%>\% fill(year) # Value (n_squirrels) is missing above and below within a group squirrels <- lazy_dt(tibble::tribble( ~group, ~name, ~role, ~n_squirrels, 1, "Sam", "Observer", NA, 1, "Mara", "Scorekeeper", 8, 1, "Jesse", "Observer", NA, 1, "Tom", "Observer", NA, 2, "Mike", "Observer", NA, 2, "Rachael", "Observer", NA, 2, "Sydekea", "Scorekeeper", 14, 2, "Gabriela", "Observer", NA, 3, "Derrick", "Observer", NA, 3, "Kara", "Scorekeeper", 9, 3, "Emily", "Observer", NA, 3, "Danielle", "Observer", NA )) # The values are inconsistently missing by position within the group # Use .direction = "downup" to fill missing values in both directions squirrels \%>\% dplyr::group_by(group) \%>\% fill(n_squirrels, .direction = "downup") \%>\% dplyr::ungroup() # Using `.direction = "updown"` accomplishes the same goal in this example } dtplyr/man/count.dtplyr_step.Rd0000644000176200001440000000247714006775461016356 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/count.R \name{count.dtplyr_step} \alias{count.dtplyr_step} \title{Count observations by group} \usage{ \method{count}{dtplyr_step}(.data, ..., wt = NULL, sort = FALSE, name = NULL) } \arguments{ \item{.data}{A \code{\link[=lazy_dt]{lazy_dt()}}} \item{...}{<\code{\link[dplyr:dplyr_data_masking]{data-masking}}> Variables to group by.} \item{wt}{<\code{\link[dplyr:dplyr_data_masking]{data-masking}}> Frequency weights. Can be \code{NULL} or a variable: \itemize{ \item If \code{NULL} (the default), counts the number of rows in each group. \item If a variable, computes \code{sum(wt)} for each group. }} \item{sort}{If \code{TRUE}, will show the largest groups at the top.} \item{name}{The name of the new column in the output. If omitted, it will default to \code{n}. If there's already a column called \code{n}, it will error, and require you to specify the name.} } \description{ This is a method for the dplyr \code{\link[=count]{count()}} generic. It is translated using \code{.N} in the \code{j} argument, and supplying groups to \code{keyby} as appropriate. } \examples{ library(dplyr, warn.conflicts = FALSE) dt <- lazy_dt(dplyr::starwars) dt \%>\% count(species) dt \%>\% count(species, sort = TRUE) dt \%>\% count(species, wt = mass, sort = TRUE) } dtplyr/man/transmute.dtplyr_step.Rd0000644000176200001440000000144314006775461017240 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/step-subset-transmute.R \name{transmute.dtplyr_step} \alias{transmute.dtplyr_step} \title{Create new columns, dropping old} \usage{ \method{transmute}{dtplyr_step}(.data, ...) } \arguments{ \item{.data}{A \code{\link[=lazy_dt]{lazy_dt()}}.} \item{...}{<\link[dplyr:dplyr_data_masking]{data-masking}> Name-value pairs. The name gives the name of the column in the output, and the value should evaluate to a vector.} } \description{ This is a method for the dplyr \code{\link[=transmute]{transmute()}} generic. It is translated to the \code{j} argument of \verb{[.data.table}. } \examples{ library(dplyr, warn.conflicts = FALSE) dt <- lazy_dt(dplyr::starwars) dt \%>\% transmute(name, sh = paste0(species, "/", homeworld)) } dtplyr/man/complete.dtplyr_step.Rd0000644000176200001440000000351414031070705017012 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/complete.R \name{complete.dtplyr_step} \alias{complete.dtplyr_step} \title{Complete a data frame with missing combinations of data} \usage{ \method{complete}{dtplyr_step}(data, ..., fill = list()) } \arguments{ \item{data}{A \code{\link[=lazy_dt]{lazy_dt()}}.} \item{...}{Specification of columns to expand. Columns can be atomic vectors or lists. \itemize{ \item To find all unique combinations of \code{x}, \code{y} and \code{z}, including those not present in the data, supply each variable as a separate argument: \code{expand(df, x, y, z)}. \item To find only the combinations that occur in the data, use \code{nesting}: \code{expand(df, nesting(x, y, z))}. \item You can combine the two forms. For example, \code{expand(df, nesting(school_id, student_id), date)} would produce a row for each present school-student combination for all possible dates. } When used with factors, \code{expand()} uses the full set of levels, not just those that appear in the data. If you want to use only the values seen in the data, use \code{forcats::fct_drop()}. When used with continuous variables, you may need to fill in values that do not appear in the data: to do so use expressions like \code{year = 2010:2020} or \code{year = full_seq(year,1)}.} \item{fill}{A named list that for each variable supplies a single value to use instead of \code{NA} for missing combinations.} } \description{ This is a method for the tidyr \code{complete()} generic. This is a wrapper around \code{dtplyr} translations for \code{expand()}, \code{full_join()}, and \code{replace_na()} that's useful for completing missing combinations of data. } \examples{ library(tidyr) tbl <- tibble(x = 1:2, y = 1:2, z = 3:4) dt <- lazy_dt(tbl) dt \%>\% complete(x, y) dt \%>\% complete(x, y, fill = list(z = 10L)) } dtplyr/DESCRIPTION0000644000176200001440000000260014172110611013266 0ustar liggesusersPackage: dtplyr Title: Data Table Back-End for 'dplyr' Version: 1.2.1 Authors@R: c( person("Hadley", "Wickham", , "hadley@rstudio.com", role = c("cre", "aut")), person("Maximilian", "Girlich", role = "aut"), person("Mark", "Fairbanks", role = "aut"), person("Ryan", "Dickerson", role = "aut"), person("RStudio", role = c("cph", "fnd")) ) Description: Provides a data.table backend for 'dplyr'. The goal of 'dtplyr' is to allow you to write 'dplyr' code that is automatically translated to the equivalent, but usually much faster, data.table code. License: MIT + file LICENSE URL: https://dtplyr.tidyverse.org, https://github.com/tidyverse/dtplyr BugReports: https://github.com/tidyverse/dtplyr/issues Depends: R (>= 3.3) Imports: crayon, data.table (>= 1.13.0), dplyr (>= 1.0.3), ellipsis, glue, lifecycle, rlang, tibble, tidyselect, vctrs Suggests: bench, covr, knitr, rmarkdown, testthat (>= 3.0.0), tidyr (>= 1.1.0) VignetteBuilder: knitr Config/Needs/website: tidyverse/tidytemplate Config/testthat/edition: 3 Encoding: UTF-8 RoxygenNote: 7.1.2 NeedsCompilation: no Packaged: 2022-01-19 21:59:49 UTC; hadleywickham Author: Hadley Wickham [cre, aut], Maximilian Girlich [aut], Mark Fairbanks [aut], Ryan Dickerson [aut], RStudio [cph, fnd] Maintainer: Hadley Wickham Repository: CRAN Date/Publication: 2022-01-19 22:32:41 UTC dtplyr/build/0000755000176200001440000000000014172104724012671 5ustar liggesusersdtplyr/build/vignette.rds0000644000176200001440000000030714172104724015230 0ustar liggesusersmP0 DIpbH2K7\, Ku{u;A0K01;gitUcҢ. ʫ5Egt 3Z:S7e_z"սkYp#Jcc `.f`qAZtаN fZidtplyr/build/dtplyr.pdf0000644000176200001440000044531414172104720014711 0ustar liggesusers%PDF-1.5 % 118 0 obj << /Length 1019 /Filter /FlateDecode >> stream xڕVMo6WV inu,`u,12ZrI*P)%*b"g!S: Z!":GqD81R wOYI%W!yn?nB 0ڃr!6>fUtab= x:0)HUpi$Q HRd|m=T2rbP8V+/LȭLvqZkn@?.tz0Y$ad}QlApAjƲMfFDQ Q=ϴ $l)O@ҳS' ޚų~ԨM)qw21(:r`O-Y[\3mf©$Sg}.}&FևL\h;p O,}jl'cV;CtɡȮ$|`t&SqGsR rF04֠^ w-01>6w#tYBn'%7^ʣa2j*/j^%fWd҇/7;ׇbg^7c)7Xs).g) Az!⛰vO:Q1w@ѦPɌ}|NF5CBNNUTNCƟ smT+7Xp5o'Ŝ}إeE$ ese c`6?*uz' [A+z&ᨯY$(H+Rs(ܟ=ͣוKi5{o!DY& KiK2Hp_Fw\:i5hŨ[U+ 6#5AG|X"UqUQ0~;Hv8ڧ"۫H[ endstream endobj 158 0 obj << /Length 1423 /Filter /FlateDecode >> stream x[o6)a2Ps_m@Eu"ю6&ͲO?RKvR&螢XsLRo߿f0P@q̃6@By 8`!^~^3QAB(`f"Q]GNzӴVml+,ȵuDGEҸY0aR\N~Gx<l!B`b>U%1RI=+2SG:*g$Kiatsr,ؿ1h9$Ⳡ/MBo͟eZL%f{rf^%߷J 9S}\f⻙Typ%*]b~Yb&+ݹ"jF|XhzL&M\R3^RuV'>f՗ױEO_Ű2sE>\l֡m[.ȼxkǺL11{b;4Y崘wOBuTiyiyOLQјt'ݯ>$y^@]yt3P*8_\~Ab4G ny@,]xeGqS7Q(oM KC%eayӇ]ݺg\fp/QMy D(c0tӥLP!p𯺉jӲ8*Di@DNi0l2q_m}X.ӅӸZe D' ML)kۺ 93ϗt !JSEu/_@sMwB82Y#L|w۱N|@(Pc+$lCޢ;‡)!v0 sP˃5cb,(Ë h ?!8Rl> stream xڽ]o6W20 -[`(,/V&YfR[U/,hÇ9Z, mN2EȆ`RFc0eô}taD`V3I#3`1XAi@ 0k@82BPZnbJ9PC3a8Ly ^!>] .8 J"qD j"t!:E!cXFz7AMHhe%';A^ UpVAQVDj 7$R6h穯NDxH*ԉTp75pS&l9K JjIh<EAnPYSeۍKނ*rd:!8"iOf(*zFa A4AjEqȓHQފQl!>|b7E%‡Kޱ%?W7%O:-o3k}/okwWW;+=K=\%]nхeG.t]AR*VQmUٶK5X CO+EfƷ?<;jc ߮.oڼ׮v>]]:2YN;, 8e9Cݭv]gUneݞGOqYu_exoDYo wY:k H^"([~ǘl^b+;]\1(˗00ݴ_¸b _:eQŅtނYq/fVfp9䟪2aܧg-_m;[[h v+YQmgFj=%޾DYqK;6x8O 6ǥkMvH؜K?rMo\ޮt:oc?W/?vqԶN,sUsqt0%%K5q]\l%?&¶l&e2kc͚XlWAGrdG7C7eY4zðt挤Ij<]:GSH& jb4r3=;AWXzwbϼ5ysqOlۼ*wɿ/Wwm{h$|wo_ypEgtJ\ۼ;~Cd쉟(-\g騒ف-5pբI4Ǭ9a= ,Fckng(+1/Fq ?[>dEO+58P :]ľ(( \ C0\y) ^rF Uiܜa`m~同Q;v=XC=bʚիק$teSӺ81*kC`Ǻ endstream endobj 201 0 obj << /Length 1019 /Filter /FlateDecode >> stream xW[o6~ Z!%Qaaa{J hGn%&ٯߑH9N~ -s&_g?fgKQBpF(fV9gMYL\\+-JF!p`W :'-t$ žwR(GlD(('ȣ)*k;Hޡ~![.k5,2̳Ͳ<iw<`7k#g֒{ugjH NY*p)y'eQeKs2c2[($$")&)ʪ{rxG Mm/YRKt9s< )b)$'qul8M%lE3|l5U\Mm4rtlv9k~] aq(y#Nn썅pu3Eʎ (D2İIi ޅ Yy)NʡP@[g+]+r\B"*1B'E໻'ߊZHFDQk!+ʾQ֕ a=J}TMz+C(NاtbLrfۚDJ(Ft)(y#p7[qzՆ&ׯM! Z }fJg:B k bΈ8yXͦy4ŰQ- cW* (;G3AQm Des1-QIFBbTG.."RN\nDam2CawsWOls 3|z̠xa"mtnE$h j?ƣYƝ |=p9ߩadp4[T O\};֟}ǃS)%dXmFpW9̶ ?-(R(!# Nʜ|N| 3\? l lL*wr V&AM endstream endobj 214 0 obj << /Length 1571 /Filter /FlateDecode >> stream xڕXێ6}W(!% -E_u$0m-];Hw}⌆gfΌ;;WmWon0B/t{`|:QQ{6u>teG}/B1>TE )P*Z)jSon)oz i|xL;\ZrH`Ĉg oNqSz`>DIU,,<|bĈ5JsYsK2_Ȅ7OGQKad62^`ftî/QWz,Q>^ލh&7ݽ,9ڮ(xBC;k?W^}ehfcX=<Go2oDQW,? v]j#.\FȮN>W4`%sh% 6NF?d%G?e.޾-:?㩼nQ]YkͲi6 oPeՇ늀vmԣQ$l"A-JrܭZ M@C#BJ#ު7^`EEHfMC&F*2<>+5٩#Kh,@?FeBCQտ6i'&E 1Saۯ,*5j YzjLCN-.ʪ>. .+g Zoh@{hx] ΅2qbDb6i3>J5tS8rAhD5{Ec s>CqПGQ ]EMn<ϰS* _oDDJ~zHP:rz F6)MSAɼ<ƚAZmk sj/[Z~/㧾}K6Ud}EQrVEA4.׌mHD )xT!d%wܑg rxC] (ȼw(iN/C 3shs Atq">A( m_Ic/}c*񨎯V&`M~|*}dUd)UȪYcVk>g > ڮc0seԔd2ەN͋ ;p96zgiRLl-f,D!  ϻ7U&yރ e%Pf3!;B̦m0][C<OF`i2=ժ+jn~S u:7|;7|ɥǛ1a2_Hd1E+29EEKU0PH 2ǓkOzlks0oȖR4y ! Pit /|cj὎kݪrI>(ⷱXoX[E.א\a>A*eM+jD J, ~658B⻪9%*ԫPՉZJU҃95-ū90JCs 4'F.Il#._ endstream endobj 226 0 obj << /Length 1827 /Filter /FlateDecode >> stream xX[o6~ϯ0E%h d];t Mdvʒ+Qu_CJl9I!xL IG?~>$!(%"(D ]/l Mz瀞Z64$Qȁ"͒31ޔGfV)ja(o3}s ~:eUy"읗$$a{ۤ٘xaΫ}e8U+S'r5>aW9_~ij^ߖj٦HM9DJF/`y/plh!yZN`Q֧# q}|s'y3hĵFdBz*~r_~{}yhG=ipWcU!+Ga㋡g g;?^gO%nuZui:ШC 'F>ʇV ;hl+Q\E\ܪluqReZ8^I듬ac n,]>0/XU8?xnhcXk e"~J> wnaMnL"\ڥE)}K*v[4st׍c@JwONT1;YכFz8Iu6o;-n*s$dCL(rB}ëHމ-[tՁSsZT3QqpfPm{(.2oʲр]z2bLCg¤D?DkX9) LQ=X+\)}\w5š>({=a~*$$>nڬrg^7j-w'ђ K T endstream endobj 240 0 obj << /Length 1190 /Filter /FlateDecode >> stream xڭW]o6}P!%R@%E-ŦtC$?"/<2o-G<`(I&8 sNYgb3)B颚i4r'kjVwO(5sZBVk/o$`qBm"U2/ b5UKKh͞eN}\f:>"+H*gq0[>>B`(YSW%4 G=} ]@^BI%Yz6.s!G% ˈ*oczN0D%9ztiEbԳwfX뢮fHmCVӧO86wJzE-3,~CY Ac~* 䶧h-V̠*8iV.E%d1CPwNC2Tk BĞPbsR\]F?Ή0hb9Y : n'/ҷ}9<-K`u;/Xێv2T"씳 I)@*$VkB ~kn).7+Q7Αr7DD@Z \LB7:76d JrDz;?<»𭨖'YQx?ČYK1XN~(퓨5ZUQ}KJ(bw {N@cN=jH0ѤS Z;J_mnuƈDʝBug*}/Rͨb endstream endobj 254 0 obj << /Length 1294 /Filter /FlateDecode >> stream xXmo6_!(&1KR$%]KÀ-ާ0hYqɒ+ʭ_H)-ހE`$G{;{ {? ~ |'Tx`&P`Mޝ?/4Wh^m9U}T3aBAq=)4Cحt=|0G#ƃW2C#,;nu$؍`$B-,OFҺyAdD~,Gj4拶wqW*|ag] qx99!xvLWUZ8!1N<.\tIX̭P֨gmi3dH"s ?җR! hQ$O4Fq@sYU\gJܶo!vRR,(iM1"D#D\wI/`wW1ؿ 5ƈ3Cu L'vʇT %?lLBAD vc v*Gp2+}ԯicv'Bhur5o I$təc 2] 6 Iw8%aX'YW;KE^eegWYW)8ܯ砄{ި*K`mDb:+U֐9I;"<W 8 $ MU6ߎu'ItZc}cyD]reVp7C5lG:c89ͯ/fw߽F d U@ {w|;-v7p@"JV\ד-jݲd)^,}Bz,ƪBAj5rT}GaA'ED(w;ybl?ysN %42/Pu1Gl61)eRiQaLyi B_S:R]Ԉwq'{Jyw]CQ `[ķe<NTh-۔E(nڈ %}QcxC  (H.6ۋ#Ev'u(M+#M3cX5T&}*7<ץ5|o^1Z'}0P~]c>;iP89CB ~xw'vT endstream endobj 271 0 obj << /Length 1928 /Filter /FlateDecode >> stream xڭX4~E)[㼓 @$āqxw.']_όN6~صዧ_|⋇Eȋۅ9hGx9o`c2!-r{Q,ݨN n.ٍ+,  b\$~RO#hh1}Zphh\!iOtES&$tDYܠ<F1;!F,aS]Q%I,scm[Qy%]$`WH#j{uwۃКRlj4`[~H-VIK ({)Zz2`Ӏy0;{͉z.QYSW4?-XkGD<8QK˩kh|Ź#]c^e/ o P9UwEq]/W\/5G9 hp:qmsJ]g*={@<yƯ̏-Q%8S939Vn( !6܋=;<' z>,ڭ]#AoQB֔}ep4ɺ7"i@v]UE%ʻKA5ƒ;5'a2fv2{c&RǦ\(SgOBӸ|7s)K3^ P0շrduLq6V-Rվ;.9W(H&E%mu"4'ˎhtу 7-RƷw̖%dҜJ8:fhP7V:cK%uZ2ۀ-4n1TW8G)I5@%&|x,+ȫsى4nN(:M B0i 6婰H6 %n3TpovJ-#i,HI/ fm80wY{/66&dm_t '+Kqwta}\ƆAwcms afwt״RY2 [ M5,7:p*UrJ[HM;k sˏB҇^NϨו.Am2R @XzgY'V6BnS\]hC^GX9M!`~`/\]C>mNB4/ٸ7qcht 9+>N~!2 EnJ!Rv̋jǶNh|2uw܉Cfs^;Y7Gﱢ uݐ| 츶y fa$MoFbgSL3A Ϯfc]֞XGJ> @CQ0CXT"9O1!lBlEZ茿-BsbBϑM|kۜWxmaAĚgeB䃯n`܋Ԏ_>= endstream endobj 281 0 obj << /Length 1377 /Filter /FlateDecode >> stream xڭW[o6~R%)@6EСkɒCQq^$K$<"w-=콛v={v%^RNw}Qȸsz׹1-KM%6o`'dSkZeAogw3" eXϾ|^+_&/,(0Vh{#,6QJ$^@'vQQȏ3=0-ԪԭN(@:Giu5# dv!EƝRTB 4BLѹdV5eI.ZA'^'Cg*C*)Uvĥ){gk%jaDOu҉ V '?yJ)*UNΉ?LJ/Ȫ y݂ V^ed. QB&[I=DÕcshyZ6AhaQfMFc(D ؐ ZPF Hzvu#:)/Xiu y.EE=?l7FQ#d"%v `iӑ${>v/bI:^9dM2[ t``Rg!ew/Nqf%T *X饍뵽/Ȍ>- c =FNVqoWbeE,Yܾ\|" 脆5`#@Rk ;sqіQK&L Z1'>ڵY6JS_&8Ms-)CKӻW,,kqΐ>e"Sf1[8uA.en9 j).P'J:^|]^H=C"v3  endstream endobj 293 0 obj << /Length 1488 /Filter /FlateDecode >> stream xڥWYo8~Rtjh dѤh(q"KKMDKI~dɱ]7 Io[x{;m6z~b/EiG#F^a7˽c'_g2Q"!1$_0IQެV _#= "/IIoǚl2Cg2`g6WY--+.0y)( {>w>L:? 07fu?>17>lxX\mrA7&dbes)x~^ Rdca!2Hz ȋRi`da8Ra<C[>A)q:>fKnwXseLCQeuTU{V D;\J4^4KM+`!jNO\Hmrj:slkEG5&YU'@G}[ )y?}Po1 c|fm|>#BkNس!gҺj]b (e$ZR w_g{:m XjK/6jV #wiѷ#]h{Y9{9>FAx(!( jwjvӁ-N(h"].@;"2`4!Cp\+[bˬ.ee׮0OI=1„ZUpI27pkp*H 9j /7:Zڅ 0J<ǷC)]>3)Y*:.Q8Mfy\UXj@?sa-\#['$/%`[g 'h5r?ҳQb 7`8{'j.nu"+R@:Eae{DeSL;B4jC zyA!H`ۦ%S(z u:+ fIf-+{QQ/D;>XC3 1rKeF1ȹ~T<De{z6aBB.$GcY&(ᶳ|e|Xhf'$<zYYyA7[*c-#gh.safp'(=6%i5}dBACGa-AڛA1RdNS9:3\l\R; (f?] 8/o5Usauvr[TP]J \ؑLcáAi[d ;`gBBd:λoD/ۃɘT)3ŵlWN&wst>|[@S^-N~rm3g;9F"s endstream endobj 197 0 obj << /Type /ObjStm /N 100 /First 870 /Length 1757 /Filter /FlateDecode >> stream xY[s7~#ȒER& igCI `$M!JttU\pQձ)TE]$v)4X#0li.;U.QqYZuZ?U.jJ3dbd{.&L3Dc"cpM B6`X `4`S)f k\*k{*pIb@ dzA[l)#&l?%VLNQj`"Gр({<#/H ֋26{ 0*tVR/1@$4j e^ PC6q406X 3V]3 p5]AM.' bx6R4G$B,> )@IR%lCHmq%1\a%9&.jLN6@, j*`ȒJpANؐS 28hJT!$ܜ &BM\61`ZmH/X9rJM`gz+jd8Μ]d<;>=w8-fóFƱ|+|m u })SzN) *"u*\[h<{=y MpMw Fr ps<ħ{"##an5bv`F5jν3tjʼƦ4pKO3>Y`6Me TY'4l?oo*NQY/c/&sF]fn& T endstream endobj 305 0 obj << /Length 1950 /Filter /FlateDecode >> stream xX[4~_Bd5N$@*x⡭Jv♍eȥ9lv:<Վ>>w^8HP(1b< d`4eyeݩ87N6w< qsnw7L7Qy|$2r"xsHo9qX4`qH$8KQF:_G$&P !laZevW񍦶*mp?KX#OVlE DX~PO]^W,LHlIpBݽj!y6nRnC:kn$jr#DغAs /R08"Z_*!l8-^s%HD#RMofbDh;tMZE)](R8g5Ƒ8ȑ?qQ #Y BbRlHRU`Ƿ(Ku]JNʬ.ʎZf3"L^N2bI&>@G/5vE˖ %`$ԯmjI?L&չg6[q"MɾH ½‘$rE)|z95tpؖHCN8Eck0K3'7vx7D:@6:7~vE4;K8 2/USGx7v^ק1_Jx2uAקE;[Ͱlc(Ƒ"$s "t={?VkC5L.՗e|< e&92;I k5z$.O}h.S S\5ѴІ}?鹵0&wcIz4ɕx.]pTQ ]?4$ȻزL u'4}037R;6֬G#(Bc<5ux57)P׫ (&sd<1&ֲ[WelQTRέE=.cT}WbF缂 GD "emz)尰* ~. X*PeF3K ~b,>v֔>)q)zk( *F5&: 0t w`8 g3w mjl0P6cv/L7HDzFZP:,mANGB |2Q#Hć.Fq @K5 o]CLU 2&ek`'Pg"PЄcO;vct FVz{j+,GhyDڕ'JLRyᆴa}څhg;rdo8rCDp㳾JpXG !*bnT]ȱ$LU|/SMIw' iZߵ޹To֩0 ؏áH 9h&2KUrU:L?oM;0XW{̰u2.3JVH.yܬTUkTTeֆٿ: !N>gŰK'Z 1C o[o˭;Ctv gU'2se^ծ;:ϛۿ}w/A5ic ZCaҲgd즯v52eOkRARG۬掃mE湶\\lV@O׾yt(ED$cNhkǓUbH`QsdN& &!\B`/¼2\L ,H}YBQlMCQ_*ܺ&4yx#u Uj} G8X=ڈ]=4){LX<GO0`Ǽnxu|C>y>V5gm ߮_ޣdxe4 endstream endobj 323 0 obj << /Length 1247 /Filter /FlateDecode >> stream xWmo6_A(&KR^ EE1`7`Hh[,i4;ȲqoC(Ȼ><A}X]-޼%8 YVkD Q,V([~Y}3 1&aMS]e(X߼h!"_-tY÷K I;eUig]eG%%6͒ B0bF1HL<)'6wM6!g;d=D|^mU  7omjm_u{QAGc1dB-Y&?h0emCE|l1LVjK? Lea\njdz};@/40 `!;u3IɻBa{BGuحG+7j)0FW ĻhomS}ml['|t89ᜬ&8qj1xNˋ׽V~OW=%`w+N {(y,)~ʮ=Ig(;n =-$P#dgkiu]5Y ڂn,VFQAspjL!ۼRLu$tjgpfcDž~68ӹp `. +i}5lnmI ĦjgslW+.TDk_3 hDt,mPb^Tv&9s$eT@2ۯ]]3Cޓ&@ھ٘Қ BMOul>|kZ@Ւcݙ ҇¬4*uL!*{nhwf5 ~7 endstream endobj 345 0 obj << /Length 1118 /Filter /FlateDecode >> stream xWn6+dQ1|M qW3h[,$^lJ*e0xE} ~\.oEVIX#JD$Xp1mq\⾬~eΑs8@isqfn?gD5h}BP ?~@"ؼC9Ew:}kaPe@QH4|9N孤= =,KS}&V)JvMuSeca0z-plBM(3a=ٔ"WI;٫(fܞ^lMil pT<.ú ں=1)UGؠVY$Q,L/=)NiΠsfY<"°=Z$c)EA<9r5{>IW& LjDacE\( \.. ĻvErSZXAǖn^X]g_ J̧a@ O﬷]~1m o98m1sYnk3YUiaO L(!7\_Oes>']I,#Œd6޹_vKS|]+9ciwܮIͪ'݇@vT̅VWlX6ɠitl".]ia8A,TI%1h9 x /_i73D#xs R}-8[aI)#9Rݜ}ݕ&Bϵq 1nuUd| n1Ȑ)]f,֕BL9=6A-!xkjR@-bЩ/m_uv7=3C~gi1`!)*q>*S5x7zK#%߆@`ſ߶VKp6W1qaׂ+4gBr#Ly]-P:|7]AF0v-ʧyM/Zc,& +7Sh~-ؽ{pcAح25WWIAB7T -nH1&֎)z^L$ XDW3T޻ endstream endobj 377 0 obj << /Length 1442 /Filter /FlateDecode >> stream xWKo6WC`͈)QA{HM"hэs]LjeI+q_C(-zRpߌ=gxOgk98=4pC'l|p1=4(yQMRN<+Fi2%:4wu=֋k?ʝ)(FΛ7Syo֍baˢ2$C0E,č1sE%X2W4FQc¦86E4E"?z{|kT!X Smf]VY x-"]._0| I|/2Rs칫F> "C@064^2N++~;a -ag]e(רlѦX=po(!JaM X0hzmĹYf2ϐtjD/F)9d|Ȣ뫛۱qKP"Vum.amUN\06Z@E+}n endstream endobj 402 0 obj << /Length 1750 /Filter /FlateDecode >> stream xڽXY6~Rۘ!{hѤhhw_4Xm+H7ί"j,s|s3Z[m[/"ԷV9o>FC`l?UYR}vlƃ.&(EBjD2>_۟lN<{ߨ7VDXソ/(vdZ (j!O=Җn`6,g {uqɳD(0F ґ@<˂z~ Cң"lq˫"bXE: oTh͊WCnnzZ@u$!X[7d"#Or!we6 ~8oT\X&SeC»t L\r#T.lU]^e4MYc NLlԫMD'ݞnJxe熟A'gxɀKV0#V. VEIpEq!OA5X?Q.$$<놎s~uf)]bI)Ƕ0A lW"w^͹֔^y:-DRA{ÅtS:"\Ʈ5't.Dsw'ʜ%^< V=;fr)gzu][60X]vy,.q둓NΫr:\FPl:oCgof_fD6[kȩ"s}>,D Ѓ&:ng6ˮwET?dąBS޵Գ+`ĩ> stream xڭYmo_aprsQR=`{W,Z6iﰐ%:U\I!H,E]<.ops! %킺.B.|CP3'@GV$٩ZꀿqIoEhh|G5>bMCCVkuO܌{U̷Ei&qUKiQA<6r` $ #GؗqMcQfǖұJG3Gj@ <ʻf!RZ( q'ˢA$Ù (O&P^k@  k{UiYZ(am,6,U]TݧhȊTdCDmV;X%ߟSu<JzN5b^Nha.F͔(8ifqZg=^3P5ׁsU|6`91I*a8`ȧn,?ÿ;Dž  gra 5tqQ0LNq$H~ چX\lvaYvin ae @NTǻB7єD&`-:P;Lm Ţ>~+ -\쥠`4p) 1!=SZ LfΝ@w;6Ʈrh3$ldKY[rP Cv֖!!gi'x\3Ës+P2Acx{x9:bEa,-/-tmj Hh|߿XWsz 1 ^S3htGkɜWStnZ0/|י -quFaSP Rpʶć 4[ڏyT*Zz"Y7)TiWflHg QOuUixG̟u/眹oO㨄ʞ,M}Pr9.fwNsHDץEm&۲؛)GaRAadfIT#Άc+Ln8k^P(hDRդ^dykQV)f$#1J@nl#!zAcV[ñ6Wc>=:5uOƾ^i~32, M!ep 'ڞa7hav馌&9| q7IRgbTjs0פ6;6c5ϸjQe'eGڼK`oΝcnN> 4[2Y t*O?y:7n{CuߙUtA9/|]$N$< Oz~! }y8[|l@/7CH%T nxWI$X~*WwT &Opu;5ϫy)qBh-AZb$.2=6"e&@:͈x"\_pb&:T@']yF}HcR2^Q^e@rq?0uT7B fߧ/ϬWh`aBT%m3PQӭm;3"86}nM!b2E48I"ֆmaKѝݴi9jڐ{'zPCv wwW+dxu#&- hf endstream endobj 299 0 obj << /Type /ObjStm /N 100 /First 879 /Length 2133 /Filter /FlateDecode >> stream xZMoGW14뫻#@û *>p#E}5R%#k=HUիR!j9Nrئٰ~LA_Pqm)[D8,Zpe?n)n)ԍ=pe" 1,XYCV>8%l¹GTq$P-4d1?[GT~ID'L;G0qMq߷ ,g@!îX0\  YK`Ӣ/v Ŏ cpc*ƫxc?zVC5<oìh mP`V#T*n/)hdā ˘\Z0w ;5kp0꾭s`xi*  04Q( lL}`S_P \TB,0r g B-,_->HZAb/UMࡖ%mp8cU d2Ju_5W&?Z-7ŋ0}qwBRfDaCFv!nxÛ&/_9.>'_h\")oL_/WW.S.f? 'QFBUɱwxl!q5i\aēxv~2}s~}c2y>'wӿN6$w^ņ7I!]@{c[UMev0U߽>[ƏէnfK(q7Xk,J:h"$S1_(ZPmR*:=sOS0~/8WǕ F(j$l`>Hh ( >lq~6sqj~fC~ 6t=ѻN 5LņEHC(FMA Uծ鏄: =b^_bT4jPl&\-0l0EA0X8APF&o L_'@,J$ f}>ᇳ}oߏIphy=Z7}xqK`ݫېG#[qi48I:(g1O=!X.c1SyGCxFGs;uxVf,9C=r~q 8^ 8aDQp h7FQSV#LC9 B{+4;c-V/^ae{[ÌI*T:qo$ݕD!f?5KnhXM!]j,VzeD}0l3bsKдj^ },= c: endstream endobj 438 0 obj << /Length 1841 /Filter /FlateDecode >> stream xXmo6_!(&c5+RDemnKE-駾EZeٕ&ޯEIn y$o/'?^<=KYػy<X^L»̽w~WMףº`. 8KljYN{(-X$8FK6\;z>=İW,VϣG "RUNlU1-ZDýxd;5#縈gmn2bI*l̝̓Y*%q.7m X1Q hzyN7o_Y['\gD4_#RWmsCw}/ Ib0Ҳ)Dque*oZwELlu1!gQ,6F+0t:e|*)̽l_FBY8묨6 ˅& iz^ nhjFtUnE(6*UjӮ7zɪ~z(.6զ~. hATSxC;[UpgYպ\M-> RXs=0KDWcHık\YQ6Qn񷛂 ]1zxX̿ꊁge1bg0Uwӳ1=hf58{Si:IxKxlQe}@C>2MsQ? Gֱfh[Krommk[pZ/-0lYՔ&uB[ץֻܴ%_&6Tgɺ#Aۚ3GJ߬k4(?u wz[\}>]|>&y_tFA”J݇aDDR޵a]z$b_z'ߙFu*v1w֋aޭIL$k41;`o`X0Y7Ӻ0O+˅jgFNFl$பGdUSMWe^&纂x#C$VM+|xvC50D QHMZjp]zK8 bA0&,OCS!DEڹG;a/]N6'SN4ffnAf{C7%PT endstream endobj 455 0 obj << /Length 1510 /Filter /FlateDecode >> stream xَ6_!8*1+JE M7A h) LBtEwJڏb9sڱ6c9VL ڢCr>K"Ɂ_%`/|.€QgBj6I+D$s|QDV7[LGϓO1< +Z}1()!w sS5/=9ZC4;m5VfL6&Z>ݢqo 6 \=.E5{WۃFH`Q鞾x$\R „sw{ߣ8ut#G>bmBҮD1FIȨ_k  $jaLB3>N8&sb#wն3u-B.`eTH+;#nf>B}=d+ Q kHg%!)jUhvca 7Vҩt]5Z׭Q#p_L]p(Y ܃V MRj/΅F- MSiq<ǥäM6w'OT}j^a 2I -}BjaKg7CIs W)8|s;7Ϻ;=P[/Mqwzn:&޽Cv]~C=x@qͥ:+rWfv'v;USrX^|kLa$v Ak efWRomNxiNU^qh\kH@f3BUWrmz0 SK9oȕVW<"`̑ӌml鍊#uR܍FP2M##<5WVT,pú1AT@/#m1@}_ay$4*41PZm$S)h4]UDb }_y9zMHj]r~0 e hw/B}1]a+)MZUvLs=Lp_ R?h+uɭj}39_)뱝 endstream endobj 469 0 obj << /Length 2180 /Filter /FlateDecode >> stream x]~AX3")p)ڇ<$WDjdIpj|yHpӐ t/_W"yn!@QH@ġZ<,jJvm7vKQ]l7`?3ZQ E#GD&`X<IV~T|շr(H wD}U׫eY% *c6aFDb-C\z ۴݌slsV"N&Rr)(msIՕj50RGތl[8YFgwu&]V=}i%;1sG8g"US:4Qna@qPg sBl,'E P Ğ#H-$wAգ\drʻ`H%Kݔ$DT %xbkRRHc PhrCkir3)z`k 6I" ;Zs`YMdI"@ǹƒ^q -eVYseK߾+_xP7KfgD$ f|?gIyinrQ5IpEv% Cׅ,ho۱xe'}GS|V˖M;Ξ{'N܎Bip0xù`9C`΅lKP}9/V}?T@B ! ˠoFOc`ZSWMlJf5}kgD΅(/u?!9`caq?1Ω@*ڱ&Aݩ:T5<" >jA?6[6 fw?,frlpmShņ-b讯 2 endstream endobj 479 0 obj << /Length 994 /Filter /FlateDecode >> stream xڭVn6}W^,j[ Z4Yly.bl!%iwH]"ڊ ?p$ g!;k;',&$( Y9cЉBsio<}!/!s*yPARZ'vwW~Đ-:%IzޢI̋bY1Ƴf-ؿo $H4o|k(1=|UB6~HoUICHńж~nmLUe%89y"bϊ2QiɬK#`z%$j>i+T^y3ee[%xK^6+۶}MeF_ܒRY[L؂]ە9Ye%s 4|y dkܡR\6[S[^uLVծTpb,Rrg*1DurUmܵ|'9½{.&/u79Ȁs._%lr|;0g| >B!Ob`\BPD} fW?W&0ju8> (`8!x( kЊß:<˘bu'4UI3X mÏ( 0L4PRtJ++X>6l0!Q-6lֶ-W*k{D솷F=Fx܄>jwHw[^axq }Pvu FL񬇿~(v̏O GF[f\\d+&հ~c (Zo%QOٰ+:bM;nfHж!IH-XDkz9]9P(+8%p}uRnm7 l|ȳD' Zu^&,3+,o.^\I놯]10U'Xu'etBkح0]%/ endstream endobj 490 0 obj << /Length 2277 /Filter /FlateDecode >> stream xYm۸_a8@b\IѢhf~Vl+Jq}g8,i'a y}f⛻ on^gIln9QIbnn߃ކq yG]nS}W?nGQ,{F(p+n~1'kwn.Lqﶂwýjz3۽l~ɔ 8Hd,A{PUi,wKewP/O.YI.sWO:3%D|C#d-aQhQ+a1Z;s4d +s fDLk@طGfuƀΏ)>s(lt쓧\y>bo*ФyiqU$Y=bb5ḄaZZC3!(Ys; ,t}`i;ҦNI49A 2\䱼Kl&ÒY+eG0>k%@_McvD#Pnm 0<9@V4m_oP[D qɴo !(pV! 0.P?}XcrNMnJEƒZcrs2< +'y5 ٢}Lb$2*7eKd>.qi6+q>{\0Z'3m%ITiINֵrڥݠE}yu`br)a :wE3*G.U;"O04 YHw[6ھܳl15KmV\oS): MTSM21Й5O\xY?NHVx򚏹ߟ^R."b.oU9_7V~L_0#bI?ϊ/Й,ԷoTe&嫿}T\k7y)>g}! #_$f~(H̃ѽmE6GD[@tZP%I4~PM!u6o; Ǽc@B]yWMM:@1wN.NOT?{u}]noY$;M kEgj.eumP:@?/X2r<}`eH endstream endobj 501 0 obj << /Length 1370 /Filter /FlateDecode >> stream xX[o6~R%u6 6ָCxMtq):wx,ڎЇERCyOޫsRF~ g<ȉ# }_2P(jBCS7|T z9u9 쎈nk>L^ 7dx٤WE%-X3^zTr-sCHyF9p=>`U8+(+'UFSv37z6%7/IlXf)Rh7|ul"YZdRU5B8ȴWcVLI (>3xWp?$OIO7]s'E͘}SOf>;^XD;"JiZ.2ҶZ9K]Q50Q ^mA˕ %8 Pm|m"d6[.A/@- H{DmyP:s6"u&6T_SsF3G"d^xhx!XY\S^H,<*H yR-JfPy9O{-eM 4*N:FnM CiL=> stream xXQo6~P%)@5ņ-FmaIt(JinC#w_n&/x(Tx7s`&P`ݤi R(|[Zg_ox$mlW"+Q`644T|vRLV_0O1"AO0y<]Cwj^V~Is*G8y{3{B@.HoQ2KV_p5 {hDŎY̘#c4I G8M4% | "fMB\1v.&)ʇ6K>:WBfׯ5$BA]%+n3ۃ 1ϳDu\|;؛W-?:2uVi3:.quo}l_ H=r^tH0GJLCue DfЏV --'U}v~/ͩ@vJKIhen)3Ў@`(UfTfҰ]3>̡v9ȩJꉪmRס ݷj kf۲yRJdTok|'BnRFImg"2d9T? (# ⡯^ htxDE*>oH]tNQH{mօUDYѦ5=?d endstream endobj 534 0 obj << /Length 1448 /Filter /FlateDecode >> stream xWmo6_!(&#@5EÚl_"e:*K(/v"%۰лBuz)J9a(^CQ.w>a߁d2 ƠQYͺwkgړ.Y:tE'%s5C/ ,пf\Z$+R/6ԅbqc(MzE itJYc4d-ׅ])~ p!̘m?_Us pbjg87+Y6$槱֦DIXʊJZ0ӷGpn;y@PBс#j#@S#aM ҁBPN(W$/͞vaY|6rW>vCcF|Сq:B4+|-6RAꑔy`EJy c-՗KO!b0ɝUfUd̓hA1#Ո{ "{Ҍ2 @%Iz2r8 ± 0Y'Yܟf Q}L2 ֢W%n@۔i)G"ć^};@E;Jڀ:EVօT4r=>a$Z;]|{6#S+e&3= 3;߶)};u8Ӯ灭y!l봇L/a?ȳm;i/i'0CgǬ{Ӯlk_8jtS~(rzx^ nr&78D2|ؽ4w]զvr1PCOO[rT endstream endobj 429 0 obj << /Type /ObjStm /N 100 /First 877 /Length 2007 /Filter /FlateDecode >> stream xZKs7W\@4@JUݭڭJ>CMUhEx}dzx$ a U )p. HZrG Y >ަ Mg\(X0V1EHz\(d_!KYgT0#,_\DيX*: װ2V J.Bɡ3ƊcbPsVΪTOʄ>u29d^6_Ɍ8 $ W_C?+uV}NG 0B ſ |` "Q#2:oJ01*:qI G_[BpV*TCTk)+oUI*A-X6ǔ$XCIRtU/, $Y07`L ?h*ևb8%\jnGCR񦢃e~@VV5a\{6Ln(b+:5kIZ6{b6!s=*b,7\+K)O3JX+GXd0\!TzaR tIf/^Kli%LtxpRu1Q%n}8xo?|7wbk7 zöU|.]OBu'6c%Mb65l-p^8^o0qOɉ-CC+}~?O׿m'ݶG>c_yDʠ9ID !H}+umf`^?WVd+q(LR+0X Pxm7b9Yb[|~u:HY562K,`X՛!_}ٟVq:dCqJ( OćBZz#m,nB |YrU(Kĥ@i|\5ۣuO_OGb=Zsjh˭x?j,b:a]=F.qœbp hk0MI4N8.K"ȕ {Ɨ!2$?J0E*K,Ss2STgI*KA9Vl3D*%>C CL80b;āI4VP4 6dӹ?qm<]u} {$/=_+m6JK4[H<"NA问\!F롥C[&Za%8XѓNMH ٰƾ?;5oFMnYYp"+>mp%P\^r:_=g]آO_w@T@*.GNiVp̣\fv1:\=?hk FF' pDdY[RL #ҘG&E> stream xڵXYoF~ׯ PkN ' b@4wi[ك"%ZP\Aֽe[G7JH5_Zm(I|a݌9hM# Q?]s׷X^r<2ai[V0qt=l~`+'ytm0=szwowxa&X( y!tzL\Ď1O` ,&S7pJ壟j7-f]#Y!JnrU9W21'^4f79lĵST4@X gkZEfuI^T.|0H:<|?Sċ|{:P~7ʉĘ=_e,'fjX3*^H2V}o]$"[l@;jܳ=N'ZԴ90*:VԁPVzLpQ*1N!1໇L#GnG6lbGFD?rfЀ\l&M;%dHBWN hAXD^M=hiN93I'ݬ6 2$8lD=~..e& >nq>& ci)opv͌Tf ީٺ|dk/_/_Pxdpim!0ִkd0&D@IK[Da$Xh8!ͷQ1$Zd<# +-67϶Ak֯9U3L}DȰDENSAbDnFEM }7UUB_h(RbЇQChl IwL}^tڍ;WZGT'43̯\Қ֨BbZX~D/TA05,;XXw@M>JH΄fOko^Q9Q5t˯U5Oj8]./ O-( 61DpaٚtpsCÊ+pQSW+ [ {)h~Қ̿`AN j@Y{˺\bB2O8TlܰkN> stream xnF]_A8*҆{0-8A+E2\Ҏ=xɴlKs+8vj9yNBxr`Ey﹈S,cbJ埀0 ah&+H奬DvL\{γ7s1uAI. z;}(2_J=O^-E}s/ӏ Ic*~(ā9%gע޿6P,*dֻi$ e좐Ca~[5/JGUe3 N5}9'V0I OY mϔt=]<tIAQZON 2UÎ7L^/''r:8r)vVG׉#h87u0(s>UG8v/z@zaS̓ ܄y}%M;-gOijdV[,rTI# `G(@ ‘4RBʤPDD˭b;J Dciyiҩ= ~(pʍӀ2(ARׄ]R/lM2Pfl@r+O&GҬ$4wIv$C~#IHF_\PN{fg#ıhKT9'\2YI4[P*/a>ݛxюc2Bjs[ i:c14Q9tSDf9_zPEW@:.!mu|ɧe|掚C;л}D;u)q[j66Qg¤j})̤yѬ0zw%JkM1oZ3§7vS|ޔy]Ye͌THiYa?tF7[0fܮY^u@ MQQl>UD4Chd3*Aa6Ffۙs~mdΩYM4Nmb "o+/!+I쀚ڃ[B֍BsIV-ٵ(&~6U4Ti–"2^,xT*rY}v?NsRiS~ON"kJ}vgCmLt=jз=@=ɨz[+SEiPMqR0|.|b㺱rTY֔/>BGV,ŊiOE<}t6ү>aōscdžknۈ6΁~jS^rxmq@cd'X7OƯd]i"Ɱ:$8ӡ,c0!ud7I+L"n8cH-εz.O .(6܎ltf:Q1([24^8fxBK)qʖF Fmy[%[&Rz\3 cR-j2?cv8ddNߣ#.x-⿸quߛhwp)dVdX3X`h mJLR^=qlemQE^JPe{|&W>*4yE3Z<́\ؑ*PV;6>GSmgx+'Eko@2.K)7ѕӺu?h 18PƯnyu.٩3V+14]JG<Ҩte/v0ƐuU2,k&[!zW+FOHm: ɽw(vi endstream endobj 577 0 obj << /Length 1524 /Filter /FlateDecode >> stream xڥXYo8~Pk1+J[ M]q"`$VE[>Hp曓v:&,&i(Y<:uO' \D}Yݴ銂YQڮM}ߴ|51q1 #,Pw$s/k7ݵ8DJp(VfKT}1SםY]*,i/?\y,+F1V0Wcܧ`_ !!vQLcǷj\a-#v_c.'ozYh>Lfp]jYfR)-1cb?q 1K m2e['(WkOjVdeVt{ޮby5aO:c9pR[uHgĒI.l=],OJs'؂7(JCj26m Z^lG&FJpm^ndbLI2j#]7ͦldjz}9O`JՙdW9Kxn{saIg-ޞ'1[ڬ*@k-L Ƨ3 |R|ȣ?"M%,<imbȾ6]- mn5ÌGI)5 LxJ\-&?'ns7+~ u }w6A ǑG `6ܹs vEI-.#lzC?{+KfNײ:cUj{} B-f$Ԑ.y,֖L2: R;>(=s3M̱v1ޡ " }L(ZklLw۰%?>l7:o|A4X6ZEb@ 0m(2Ɛ%(1f" !L `%$: lYCuƁv1Ǒ %;9ˆ6Q6I7t̩!E~!9~@|>Eo<`!-">FaVGV,SLE$QF}: o/ TQ|GL{ 3(#27ϣ;&7T4d:rJ'^YL\n1hYҋ!j.Nr뻅 NeD>xPk׬A%Y!yp< |Jl}#q_ʱt7!B{f?;VYΏL^[r8g S=;vTM le'(C'^CH$>˗$A- d'r_1;]=5|(D'^K#%o.2{5 ]i endstream endobj 589 0 obj << /Length 1415 /Filter /FlateDecode >> stream xWmo6_!(&1KRAW ۚ0`}HX^<y"N4(x|x܋wQUDd1c $$ .ˏA2H<"gN4$Uݭrg-;lx ^Du8fUDVtJ],a&Įl L}fF*y"_V8nyߵKz[*ܽ_2 5 ۚG߭ '\v!a |Q!QgkR6ZC:حBLm\ZE teN[ksAYAʛYaR2|]$AgFYFRI0*褫˟~ Oqi:Q~?;rJi 턱xn\TՐ 6][;Cw"~%x~MfL9˘Dfʜ/QG3L8DD`L`w]CTenlŇu`. F`F+|VBE囗.fqP)8C;5: N5#;TdL]u@v{}>8[]/\.R;"ByH8ȥ5$ `\y׋_dmM Z_]l [/vd%onPa%xХdߘ^yp I\ UޕNrlc; .,z81 \Zt^w _}+g^ϱ[ŕb31$s8bc%|ݒAײqbE wL@a*]R̓>|:] 3M3#$JXeFA\hBds @hOa !mj0, tlY2k831j>77 )^{4MHMQv 6mm6ؔ}}pQJ1 >.aFq< ]])\Eo۾r20bwd_\(-A4$̢͉ܸAT|H~H^Ѷbփ ?v endstream endobj 543 0 obj << /Type /ObjStm /N 100 /First 881 /Length 1429 /Filter /FlateDecode >> stream xXMo71p9ah!ަBd)@_7؉c)Aw9zߛn 1.W +h('"cLtj&5XtM2Lʱ G::fSِ#5dC$1oxmn`cp)d &WZ:`&?XYX6'\}eaT/ "‚c#R)ΌpT҂u^L-^Y[RKeZ9Y4>*@XrQ䋂ucW\*Z!X@$p(6Mû޴gд/^udigA}zLuдG`ىuX^ Ŗ(q7s4173ϙvGS{6<45ۂ/>[?&zmљe?[cNx g> P5 $M,09/g[co#Ӿ$*AʔVb:=ل%V`A8ǜșzT-r..j@~3as'8φZxAf^<ZhJ !Q\ p1?}ރ=|v`aoڧp Kd+{ių~&ݓ{S_Ş 2[` $W,r-֜5ɖNd5ѳKD$|Qbe[tCۓ&]:(؄5κCz a|7 u2; CM*;G;W =J\t{X@!5z.#NLƁ;X)#ЈۜFB!k$;\K*ȟ~=dV{rXjke3:dK&c:9wc-hz`%(x]^W3~X_IGY={>r rZUi/6qBV5_y,wkbÏFtف@f4 -;%wr:;w˷ٛOSMSuW}(~q` X7$4"f{ܵߐT?o}nQq'*GO> stream x332V0PP06S02U01SH1*24 (Bes< ͸=\ %E\N \. ц \.  33qzrrJi` endstream endobj 612 0 obj << /Length1 3010 /Length2 21920 /Length3 0 /Length 23431 /Filter /FlateDecode >> stream xڜT&;!x.wo$[pw%HI!t$kV>ꩪ]Td*jL ){W& 3+@ۚIdfk 9ԭ]mA&o}qg+X"a 6SwM@ 3@d P2qv!Q;8z9[[Z~Кс YlUG+dZ kgW{;_"v&ֶfvtC /df /%;/Ss׃93܍j f {9 PUX` ك4z~m8/~nVnvV# /;_K. W:p%@.H@V+dim˕8F dV4qu2>?Ka`o[] \\E+9x|XL@nn'+/O*&?9'@ƣӕ+߻jm/S t6Ok?ZWu~w2aU,?>,RnB??gB'Gj6 8*Q{KۿE.R֞ skW3+ /ƯdlA*.ֿJHpך=% 0qv6Bb3''{<,`HpښQ4Vpu,~{kXXײ۟`$u_/?UXN\D+/G+`? lzN \_S{\?2k,8`j{7;_W!r4اV@ 8Q`Gg?o?w\Y;cpamH 8Nr|7/]CS`uKߑv_UV;z-j Oqp9u㸘98YUֺUcN=`V? xG r+fn-q׻+ ' ie?ЮzQBI.+?+|Nոc9z>&ͤ~7Xgx|]Nۆ={rgͭ vdD5`A0XKgL$:;RV :u^c&۬{;݊ ׃#IϊĬ k쩴)nt'{ArIl2VpC,#VzW]ڑ ւ(3*aw%&!#* D /Wע] 2&{ !8oV)Mͨl{OAQ^:b?GQgof?M5%&Lɐ2 M .*+]Tr.Ю+b&_j䭨r} B-gW*ҕ=~%-U6c!z8RO1};1XW3/.NzBT%\/>0odnYXa˲Ep,~@aoW6ZxO(;<̿?Ac r:XXRR)_BS-JXEU8*F/}29de|n@/^: /τɒӘIu]TZ>iLfRVb8 '`M( AɽwtcwȘ\b|h 4 m1CW2Ve>m~ϺwPU͓j?pQ3s'=tGcly|uGC=(BbɒWTF!^t6w}mQQo(+}yOljnޡ mˈ":\ RZB#k9yf%ڦ0k,,`q#g,CjD5Tw| ty{c9YPM5B)8A)Z)L@\8ՙ!BKWdTC,FJPD@'B"r9`斻DdԬey9/ ዟ1{NM~, VҥP Q !%u WrfD75l\mL%%pco.3W0u}҂{g)-ǏAxŪ1l߾?FL}^o&I:Kw;7ۋ/"կ9HGe*U5hۡ09+e,bXN;luDgq#u^xH{Fߤ>m6>AOb|_x߻hmtJ؍%Jݎ@7_ %VT42Z m my|8@iT s"vUV՞c"չqqNu~aqfVS%>dFnFE& }C"O3Qf-F;>Tes1@1hWL^5/]a,7KjD;:[םAʴF#ZVmQn]^sy#rpyލpN!p\3]֦,esVE%uXPSaQ$bZu/mIւԍiP!ٟ޷]ˉ{^m{T"=#(Mb9?i5x+]!ତdcd3D̜.f븏 ճ^ >&0z .d|FK Ĝ~,^#n31_2 y;m8#5; ~yENe P-{nfȠd xOM/lœ\ºmE'c${>) N6ҧ8;m(#D&QQ08H=dzvfǼ϶L0:C4'ASUnnט.y?QK]!dа> ku*O۴W6p䷏>I2wSHI?Lca-O} ؖ oT/vQyv(jd;~]+"?\d=w@ 6O@I@_@e jK_u5.@;}ݲQ,ch;qW[♡j-o+MuJJ`,i;\Zr;|,R.w(y _K&t]MhWN눾.3;U^'P-Sb|{I [.j\ f#v[?PA2\\'cۍ2t;4¾ "xԍ7r4QEhFN>?1*.b~I{ϫB܍:!NH$d<I Ffy~(dBjմ9췓}?5s|O&02bf'` OH: 1R}C|f.N'i@B=Oj~=ܣ2Z.=ѕg$焭d]yHM<} CwV![dD 1Ɨʈ82Eh:V"-o 8}VWJ\}uٞQcl|g%"[EI @n\?b d ^sxe!ݗ$LF,T+bX[1Ӗ$ 3F9K uUz "$M;ycK%0nt"jƯ99%E^FvG6>kG fP^B7~c7S;VjH.jȜ;hrmPhEy۴MlSi4,o9%j9 Y:1M㓺sn޷Y{; 3X6\W&a"KS{Wu7R Ϣa[҅ia&jB)'IRɝxնA tcAQ*6u< QH<ށXǢKd$Fnʽ&%^%JޠYa`+iT@0~˵r:֙,郃5.QBD&e@AA0?;:rZ SLؑ(_m8XVo%]UOϏ&#Ÿ屋8K-^9g뜕ў ?Ȍ8%RR58BCN;;Mx7&)Gg Xi0@YTÌk6z?~W="X*_\8NͪΤ5D RYt$b̢Xi\ӹ6S~ `6Gv9/-2T^~XJVCa0L$禧_R M_yچ=C Fl׮nɣOf!#a`Y)Tk5,@3PTUK?a T44=c{Wڛ7q~73ON Tbg4ycAXǩO-r_kB\ !{NۅW_݌ hm7c#B,-c9Yi7wSR|o~9SgnHU%Vj,eZцwe5+–22o0`:NJZg /qoz/7RZ}G'ܲ{.S~~7 9R>bcǤ:d1v;{~)?yf+ Y6Ӗ߾!{2 A)CPTU+AA.W GBҏ2tF_ q -'? ddm~M ʇ" olc\0RmvJPy}{8u$hF^+G7iwY:n63u,aVcF&j2/Utl=hKƩ#߉?j€)OezXM#nΫfsN'/^fo=KrJXb8߈1r$$akeĊ hW%A,*֨"$UOТ;UP韤5O7׏⪙OϚ:(Ky_%+ͭ|p+Ck#,JiΉW?%iwW04 VEH%[ لiPpn=њmdoun6.X !_:Kũ:BD䶢møݪxà|GZ[ټlS(3lˊ.'f=6{!vՓZ["_Qq4HPi6=*.u/XBWizw@m}|=5QkP;sPMrP u׍k5u#"-e.7Tz16L{x 5v&_:呥>o<*PX2TYd-Xyc-2aCzSgҏGaG'yg K%Iy4] kcO; Kc<,I:T?C5%F70qx_{!LXu~Lg;Vrp^&Ѽ|}L:N ڏ`qQ6g\2#1Ї7):cc\BZP .BKp`n&M{2D3Duf%;3im!`zm ׮=0=!T͵zI0$Oڟ%`S>lœUDC\4q-d]>D;vu:£ ۺ k}Ķ\499%?S*17֮gPc3\9gy".<%WS;p)N(W֬2k(:$}ׂo,} լϗXW=JnyL*bMYk(SR~7Bej-9}㬯j.w8LR[6ޢw /it"m:ޒ1$)˩TF]Q`uFBV 5,p'+"0wXI%Un(G%Ul!Or&ԆEM'{8 B> 9ڞ&xPT ~{} Rq_nR#IPJ\~7Ry\kʫ1 16­9S59cQbz-=ې~yĨZdϖ]nшFNrlGTMx[Wc(N2yYhr .;_;eK2';?a;?Cy+pe+X%P˵∬badlިh,D? Z/n4>d=EC|hbA((櫖o K‡3`R\UkG>"]B% ȸ3d;)}vv{NhT"&E]WX@gAϩ /Q>mX9WAQP$W}U%0Z=E(;;T|!ž0X9$Y5}%Kj8Iq}_]$tԧc~XtFwS!Z߈5Վ6IЁ_;>[ A H4vgLwbOJK/lns,gժ$#EvF4WttGI=Ifh?[ʈa7~jH177B}ўH~iz9ZLAP`cTL־e$7O^b os'y}nbkp&N׼VxKn?E"phB*&e::9trtG7aT`_.F:itT\][ϙ8BC>?7oXn}\ϓQoxh9I$M#Ysg}Q*=FhzN.o1"x]&u#30alve6VΦ{[gmw]8QC=R;΅dU=T- eJL.}2zʷ~df,uYթ0wF1f%2 uaI{$'5R BHkd ޸Tm=5&| ϋ~Udj%O3@)]ĺ @ h WAǨ{pfpI@({|T/z5PCwIPf0fqCa.B96ǦmGE,:L[~x=*w&ʃd3L]u Y%qeY!5\}m{~qOlj1ȐX'Ynڐ"Sl%K%Vw ߊX~0 ;"sNQ ,*'!1F1(#$_QFLbg!%U\棌|佫4h6tAaZ`DˏuTuoE5d[ D*W&x-c?׶?8 j%yʨ>CxV#-!nbZ# UMj^q,ti -ŠDJ%@gD썻bEF?c( kWog'#SeZxŽNLF|}jfXgσV4=ΐ3oU@CX# QX__z2GJP6aN3xc+_CJy'8"EG+wjMhi M?z =I ]C͎O'u*Gh`2R'+a;e#h 2U(w)M'Uj5Bv&i[a[h9:,RüzF¨\&5 ;'fok%OۆcR4"4m[( O.ᵴ۪CԏQ"u-5 Љ{ףxSt)(*Dȝhr c1Dz8M̯4 Șng#xKƼ^әjݭgh@?4,, 8iBkR˚̨&0Փlug"(1^͐pS5~x4p$,fr),v.EnyI ֋U)æS2JUJJI뾌b|KAHҋ3gqڄ*o^Np4j|B?PHN݃ *XIǧTG̍>\I"[axZ]?x_:5~xWUqُaR\IߍbFgW1悾/ƞr1fT g껀XY_}ey)T;G?}?F- ׹Oڝ^o?`_DsXض20;yu*g_Ϯ/BW^ϱ :Du'$9,ϪTؒzX UX_l\>׃.e+5IY94Y0Ƶ!ilxŖL(\O2 `t/ (iLF ք`ސoF~ H |Z9 WV,ԉzo X*rQGyexJ2}j8Gbtwo Wx@R* __\̵.#w׉'9-G?2W̱c9?tQ! 6XKkW@ zWzTF|abZT!W~dAv PN{=RdQh|x9&145IG$=QI) i9$e(T9“!tѤE}mئv'O  tՌUXT?:;}1Ŀmbj-c5XKĎ@GP^&HR>nQEWZ=*p&\@=N/T(O?ԙO4 ~P(^ Xp@>є{7LO;Pڿ(qvX%x<VѨV)M\PK4\¬s@ű@Y<}= ЍkS8!dnQ$> Lq]]({ 9(3H܁cMie${'p &b|DcAvљa2Czr{;Bc^Lwig%/~nk=s(CFXe~|{ʂ8-IGǟIh\ޔS*4kG 7S"O=O>[ze":E5*x}j9-GP^SKA1)U!|L6zr/.Yն>ǒiо@<'Zf+ bŃ 5Z2Ċ_'՘0@W#WƵĚđiιY5ojqV%<1.#~ )e yz NOm\Ƭ,ooElཌN߻ǒN?RұEFQ񵯁np۱x(:^`EG-SֹƳW5ìSDJ;kYbS ae{ϔ5;#j ȼÄƙ۽M $ȍa?g9\}!|CQR:XICSCL2?eW2u˳!~)D+Pʳ_ o }N6 %8faq[E}1C+4uX̒c]>KmZ K F|x7 o>ݸ>gA}UIwj{T+JvvЩpɦ=Y?=Hoz]";Pw' hڒ6lX7EڠM_w|:&+uƵ2[d~{V(C@(~֦%k^r4Q JÚa9 T*eReb\r6΀:8-fߓ-nߔt@ÜE {<|T FOdAE-b\(^oiy"5>MrML1_(LSGeأ!9yw!j1^fZͺِ9AXN .-ۑ$]5 Y6f;ft`p6W2RGHmmIo#wW,_` ЬVᳳ.}K9C*GzqxM4" }oˮ)^Z>nj?;b1Bo{g[[=7`jGS;L{}ed6k|d[VHYPdX|l&ika*qx߂.sJ{]K$B[H'y?Ar'?vd)K [NEW:#(4C:F4oh(o=%YU껳;]BbQ83B(IӉ:Xi @{QaTIc+ /R4ENy>q)9?TG*{ wKm}|,,7c29 ۥf&XvDI$b}?L{'lO{q`<[ sUwXTZ*=l+ٮ7fkd#0 HF$zׄ)N ^rD{Fڞ4-%2B|JX^ؼ®\ȧYu)Ϯ2JR46u 2YqW_Ky*QmөHIqR+꛷pv6e%JIJ+/U;WTNk}MqlV9'lH$41XvC g?" )O3! I;{P2p'??wSkmB^:Pss;V,s 5y֘mn7_4|2 72ed,d8:Jx[nu'wV}vK)cKNdXL. -RYh&OQpVOS{Q- F-YyC1}C1{#8RA|w_ˣ|smF9.bTZ^$:_7İ`&U5C]a*בKs5 ٧O>JEc'&Cu{ h(@:{I*v_ke*ƄڤIʜ5Nӡa9Ű/j%z+}/k#K%rȝK/D4n@4܄ɏR4A:d6u?iP>vjPq_9I8@-KeR:E/oï. @ⵙ؛'XUJF<-' QjnȪp+A?d9Cg&ˆCCMedkx~>Z1 O7}ΪU $Oۭ͢UН#˽\X ՚+rq<>tlM 9 x ΂;{j;[uWpHcLeސ5;eOwl!PChy=E=w_5:a1޶nλDˡ> IZs 6qkj~!fer<gõh@P/| aK*{YUx\IX) %_x9,r'{W=/{4YN;`3ac~:n kəhxٜSK@)p2տ*Ƅ24zi!D`3^b~rP?9`\L}+  ̷Jw"u~[wk1qW]| K~"G/:UD^T@Tf@DO^lb՜΍{XG"- xICZy"y#ƩwAH\d*;|<3F>r9V+A BYk_jz D9VI4Jr&^)1VAC*Tp0>qqbڶطħ`2kQ$qAS#(/ Dwx ;y5nEN\`n= Ǒǯ Ks=*_8S Ƅd "/tZ{aPK9 )!ufl=C8֓!;+EPV+L+6t돢PN vaFl!nsz4l͆`VDHS{ TST3y#V,)L0iYI}3xp8p~'|rgĢ 4ESٵl Kgl:g nQ3Ja i`g{Tg_<3bЊGݰO6L2n=lc܆F+#/:,Cǩy4rJ$.Ǟj~_+guzɑ$zGk8^P @ Yrb9m6J~6`UiK/ny1!y,?ȭ% ZYlb{h9ԍ$t~;;UoVыHc=Օll9] :C&v /j6?K{?-c@~dzu<]KLH(GZlPtߘS@a^XPz>ȁ9/ L")+^k#>N*C݇Cej ? \,"4ld!\GNq>DOYkDc9 S3i1e/CJ"|ARRb&L&+QIǒaAP}*'? _e50>@9LJq1EـB |Ϻ"!M$+jsOdr[NPhA!ԣR69:=C?gW/E$zZ7;-mʠՂHDE~^hq;:XcFEMD(9.7,4Oy~zOX1k`9y^X,PA]Z9$/O6L@ L&h\5?n7ބ`H}=:DF{}{eR"gNfi߀ RO$uObsn#_i.-h\_bAiT»\k]aJK"yA"G7WCH 8tmjKڦMtt}Yzeс4CHd1AG4'6]]Y@Xェ3뼉ңChTRD}3'FtʊF E^zFc@Qmn Օ2?`u-hIZ5Q|XJBU5#nݤ]AuY]'zX,9#g4C/-R+%71ii، t載99Yp=|dBڑOx'\ 2;x-_7* ?u6AM:86&/x-=Viazܶ -]W[A#NbQyzGR74)(M*x ζ-RU;Y(5IWᣕIs#*ї_^R䷊߮NNͮ2ûGO֫l! 4 ް{ZeI@;.l/>gLN@S𺪠MQƧq|OW4`[/"nʧ4B 0 qGdzu?Ս9ȊdlchnՖCԶK"CZPf1lGδ8)1jߺg0T0ԡ:ۍ rGzq:\N,[3n$M$%mK(- T*Z~mt3(͛e=_@.qJs@ F:{@*Il.iAS1eZsUsS|z!aP׽xAWԵp<,BC>–#ߚ} 2/+J%i>A؋M0=/ bڬ1·)c Fpǚ\ zrmmdN} Ĝ6֩S8,6 R!ޛ.0.Kԃ39W ˢvARȒazK|T"Q`N\_¡K"J)\訉3َz#Gg_d.lĄ^頜ˊ$ fn KcYmrj\[8T3;ozP/At@)$~$IbKyo1,GqF.U6Kʀ-wsSVBj)f!Ai? 2a+?Lu꓃H{MF_}Bz5\9bQdêx1.\[NM}a^^B]~3G!dOc'""!;=m~6Orˌ^Val 2MVώ ;ϩĪKɴd:dW% cj( ~{>H(sUoh=$;z/@I?4W@ht3TJ[&'D`~&l_?EhVZh:b5Sv.eA,t;s_iݜ#wLKinKf/i6%#Z f]-Ex@_aELr<ǒ&J#Mf78(f77[=MU-bF%(/r;z?oNl*?j*"2 9vۍ').,>IZH<\7M*' vuj*Jh9&ЬD2h`a[|j`ÒӬ=*k4-&'=^ ]XC JFa`E g^Y9%e-Nsĝ;ϱ z7ZQQd*§r'fƟc"7GW@f+%Yv&hS\ aEFsۓ,bC`~?V* @xfV>eJ䷟իQ]wu& "εM ,xW4/ā8IjyFFo:poMix} 8rWwx `kaBV{\鷏sA̲+c%dXYũX^GuiIk\B7,ƫߞL@ WKz0 VWPc5> stream xuSyQa"AXHx\dDg"B+1+|&WY#]AĆ#t rt&TA>Z4s:¢gBvP#X4L,SB ]3i̜!>@͝[q?,fδ6Ptw'alPXp+c62@gH4Lx`Ѹp;џb B;E`B !@5|SGa5 V ku^(o>H0fn_T06x)"o1WB;Blľ  îWALd3Ep?5wO-47˝dq\xӽsiiWsYw! 10uL 2)5,fμ87 `px.1"`P @7C0sN0aB0 Q̯4xf.=eςAp+P/AIg'ϐc0nYXm,Zn+t^fD6r)m`9o9L{c" j湥i0=gCT~Ф5EkcϝWFWO;T&#񺓛Qz|%1͏(u#%[҅S.x^Ѡ[ꨂJvU}E*&6޼d(۴dzt̬]ӣ뫻5S^ّX}Dkm60dx0t~zli^Kɚv󶞆{k'֩#%ILf=?x$6wjVurhu(237k<]iu4Mтָ'" ^&?S^PZo#fn=q-ޞ'IS 6Ɖg'v5+:+E-%F#/7삯O$1w_H\W8PAݓҨ@BT9>2hZJ?U7[qf*L&\꺪#oXl-Aih\Fѹw)}ʭDءx5{b 2+: M%w:~uxe[ؤ=j*/ާ z:V]q[e"Y)sa@&YDtd[~Lwp[:eMY1uX|ƹڪ~9qluL,a$+o[{$mr>[4|x~p7>Qi\XZT< 0\8e@<2}llDUޭ\Q=D-)p#1ve9k|U\3)J)}AؾގWuЉ<گ4kli3[}!FW7=81&A[%E R9etI犓%?Hd)g֍{}:drވ>~s@ҞhReQ? {#nq69WxKKԇn7r겜p=*VmI.xu$ #c|?M>ՙe:Y`{Yt2C eͺiۍ{6i8U捞5 K֭^]%+ ڍ#VE\~E"Pk~%lLs+ęyoj UVHF`iͶ8QO 6kKZ$M sSC] ąhv~B1Ja:`:>LcKRa-4&w([nR(UK}5*a㧬'R4>o R:`4V̷(2語rnxjo \s͓T҅ اPPhy`#qRãvEjA fR[SiNuC%eNy՝թsG9޷h{cdE>!Gm,)hi|-M7Q21dՈDZêhEm 쩒\h endstream endobj 616 0 obj << /Length1 1626 /Length2 11621 /Length3 0 /Length 12457 /Filter /FlateDecode >> stream xڭyeP\.$8A@pwwwwiqwwwO Hpww|ߙ3gܹΏ]ߥZzWw&%RP41X;21pVFNJ6Vr62 c0]Ί@J*l2tX: c`b0rrr"mlf U%uJjjI2S68,ml@֎!ǎ  0[ r q9U8doh Pp22` D 0XX*́=` @@_*- ;L {h[-.7ͻջ=lxϪ "fv6&6@J[]hv8\el- s lm/4{%={쿺:zC[[KmO `G #{N{nS5_"imb`d:g kf(AX[A&r6)3H_zw%ZR}cKg2bht9:hl,]'hAkwZ!;]A `G_UA`k;@o:30/XY;wO/)B,׿ އQ"kc ٸi)K t'^?< ",SSkE{?P)I Y,7xzmq;}ٖbIޕ:"BNOWzqKf JAmgc\QIҗ9ۭ-70.&,~dhpg:+;(љBuVKjQ.ָ5M֤r'RncJ6Q_7BHDѩzVGLQ)jL]d#T-4;+} sEM3Ei7z-!T+K̈;/kT_73QN8K; f¹0RFah}o' r,.%wa9h0ܱq#QF\XÅϫ\UKnXyԝ$k ڕ7^ܷ8tcx gdO_PAL7 Oe6HcTˬm8x=;&iΆ/Y`䔜_t=uTO,yĜe7?'0Gbzy{8] vnfO6 3_'?P:Zw-Z}Պm̈^3aG[/)K?7 5MJ+qprDnX3h zN/4f+JlB_a ~I K9./?Uo]BKY>D7œn̷pɐ6+eA/ڏu_޶z{\M%3||K6s,ޱA,]=w ?@ .1ZSخmhvh"15mƤ1 8Erd]ᑕ=}gް}>[8H+׌8)#Tȸ6FvM@; 2g 1M^m7T ]2E^z ߟ[Kͯ$!a* ܝ*U)E iJ躁Y5$R-,85G֫'뵠J*R$AudQo7|k/s{jIUCZe+>3xD ;sHxE60ǠxFc輵-_fq2IU~HҜϖwO5Ҕ2_ir>Jh[//E:ni%`I>LܤP؟8 ;-EDsh ʷ|bHH1NA'~!/zAo']P$o/Xk6`şW~gl@FKzVk0^vȩOdB%^mB?WT9A%(/Kތ,ݔgf݄Iu,'cX7_!J'Үr)$UdaTx%=Q\YQ#^0~n{]6Ŏ?4yU>7 {/ Z ؒ%+*WYY2k{W13.%;qV#AاfE]O_Zo/&ru.F*ӑzW g%+l_4318K,1a'01L~Ĺ3gѝ<ԡ;$)9y@̘}<Xwgvi츺, cM;|bx'{ oU'gaH9]O8y$ywpk}n9VbP^ pAt"p N?K7*-NS$tZl/ERUO) Z<2؛^T^˚VTglُ>`q"zXf*Ĝ_:I~)$kkxZus!]dF8mro-1zO-6f.xltSE.ʌx$L~Bi~`@~_}ҎQ\OJ[7|-w][~.0pu841nd^MRXWFqTS.}o hobqےV'J8U_>%Ʋ;b>p6g\7;,.jiM sm&(T?Aj#{jyM(=8G8QpG$}UU I ԡh$-9GE^! h%[oLEXW֏ k> Ɨ[#r nM BoͳZp{~qJ8(6[ @3%Ӈ D(#[W -77LܱT8!ȇ>mn>b4αb"I/V""oI@7i@0pA ~> ~u޷ɚ_A}n{ W)NL)(i>|:Yrѿ^ _zY:s#fr-uG#|XFy X%j-ĸ'Mh@|[=kNo0lhz,!j O˭eepI& I~m1waWT{3'V:}x?ԯ(B7y/[Z(dh{eOFIWE{f=ukebsmvVr^:ͧ&d Lֈ x+K3187N Jq>gB+pt.$^m-:?o2è>Cdr =⓷>@ Bf GLuZkڔ߂'Hyux{~lm)T&xbiv_nҕBi%58#QD#cY}bsx?'&( I.ip9s~^ Fr-g%e{ҽ8.92Sn=U1B@Svqf뚕21jԩ~Ťۛ $uۼcٸga-l}J֯:΀⑕ިy/hy f 8fTW f-Ft!;xE o!M'q{9 @D7u}DmoCQ Tj+b\ڸ-)$*{<79:΋l%Z;x8) %[W?ɠ[ڹ|D x bFYGSv82}6=D&m!mUoU,ܥĭEf<1!d`j ^@h"IY: &|wfPxوl8i.}Îѹ~A(ʮ Cb9ZGBuivv+bS'k̉ t"xq|,S)n9%V.#ÎkIťccTHtKHO;/7ڵH£@A.m1TlxtM{ߺys2' 4A^ՂtJ߯4—-fBw*cNu1{$^Ehw! |%z1ߖ1x+ |R]"{6.iA- ^9Hm"QCkK m14gN Pcm4 *o$yb#?av=#04?(`Jg;=^%`F<1ٜKZ)MIKVt%}H7Pu:5R˪JޙMCxה|.KpeU᧴` >$:%fN9AFOe0 _X4)k5~|mqX+!(9ZvU%"IS>~(zk7ƣT(݇xZA7%J%ԏYQj9(}KZ821,:=['o2ݿ㼢y etp RPR%{/B4wqj@K?*u11D SP. rӹM"N>pK7:ݦ/eqzZ_n\Z;q֑1*KI_?Ӌ]Y@2q[C{PegO~d OuQjb53@ z! Hhs@#+ȔX-#d^V#SʧM{# @|ڲ$_$ص,VRHGBH*鼭c" n1%ˈ:"NfZq[ȭ2z*}X6/l'q+ ǒBgs'XrM%P< 3յ}51i״23@dgsa!@7`OfP?Uѯ9dK\$)i@M:bw`]"0lccĘ\c?k}Uc=/ ҙ7jPrgKrdXy7-lʳzfɕu~hRPG']D^ %ܮ1wªThXbc!+Ņ( eD3uKfd.ǦhSF*G!BBFw\Qͬ`B ae[[H72NKL:khIjXd!ܜg^ 9ZkF8IN뚼Hu|u o#_&IB/;@9^'G^-sYފ DHp!n:\IR@͍N*tw'/~"S5ྶ΀ l{8դI|]MfيֿV#SVZ9u;4v@mՎUjn!U%—A2z`vgZkGvp)d '2FB|.weظ9zšpDݜe=&TB- k1Wo4B%Pξ*䦁\e)+fue Y\Gؒ (f}1^M`NXѹD 5r{E)_] XHt t/.;uG~'xF̆r"3= 2%oOfғA2V6UA9.ylUI>yh {jɟ|!_"l3Q:qx; aLROi\ϟxnኙc86zcXϘ]1BTe <p軋 6z :Qԧ>|}yp麸OT.0MvL8o Vi5nO/<ɑUT:Pd/;6ï 9 `c;~݆c8F€ޖ-0yODMd+ap}6 ` =pe‚c@#1KS,>~{To^ށ[\1cP%p[yyцSo@Eњ ֌TSA?a3*. "58=1##v6II|rߵŎ/2 9L ma!s"%v02AC:`M'r ⁩.Fa"ג6Sia^ btlW) ֣|ֺ 8_kaL&I'Y~5J ċ?Դ9nZӛ&"ba0:XwBPFÊI[`0=Dbu2$Ս:wyV)b$Fk=൨~02ܰ~֧Oo/sgpg!QV43e+A˭wIu zֆa݊cnR7al:"[U|iHMxĀ}*VC!sʺ=$"Ku*>Ә!Μck믚@ڬC3< Wy:qFb\OS)V'%J-$'ܧ C+@bgG* |\@.J;X Ieݼs oq!$R `Fr|7 0^ ^U(\/g?Mua'oD#<%NuhՋKФNU/ 3tӜqx }|U$>q.94Jt f,^o(SL9MTx"uI8m^C>½uhij,_* c݋Gyq?T6Y1C^s p)r|}|#jː_ʰaU kcr=v̀3m{={3Z̸7 [Ә'@ ^kebЪ]`Hp^f]`eo@[AxsO""kfdoʠVF 3VVNa5fkg91,"mJvc K3٦?<#., @9@LڅGreF ?zK0MDId~q:w )Ay@^M,b8o߃ߒ~QևU1eK@;B{<N5qD09o~ QehY*V+7!xð }$#"t,V ϐ6-U>^Z dn0b| qxcAU?,%s`JwNpҬYnb ?]bݽD'T}ڳ#M3DPuO#CV{`GՓYy>i-2ADo^QRc]a1OshBz$ R. ѶMη9WWѥ5=ySb'c,3#ѐa8(*t;^JQڲQwriz$?8w*l6)R|' ȽB,0/C?qJRt?>7*FUGD`7,IR\l5 ]@ Ԃe1:Yɽl#c&UTs2ZYe+݀0LI Mp'c[Y?޵f-rN61&v*8|.`{EبE UQ uVa~ZB0sӟy+;Lmj hn:cyL{jH⦟;j# 54fr1ϦtU vUVԎ1=75.) x_8ևW"qI}Zm?UTKSJ@29}%;lI %@f.waxF%̓TLQj׍1?]nM~ d$-n|,sHV6?yVIu<`ąnŮ fy^)[^C{}jC,f5jP|oπAo{$L1hI i4_|VGYp=H`U(sS -1~-dQ&ûq f ف'mܸrޟ"e1ʦqž /*r\Qd%Zw V&v;WT.f`҈hӞף߆p FVYܫJ.| ـ=S$ޭWGj@PAL8Q(UV:)U&Bvnx.@6[ЫZv"⺎S֧Ҟ8LĂ:2 rC1VwVAņX*12.}(Ǚ5<&iSw)a и1.=ŽI,\ŠCZbK "_31a:*P0ce}5[K Ydr&%myU/PElBkB)Ulwq~tȵg"PToȉRVEerL >\H6Z_鰶o羿?;}yB@cS^(W4;u>/ؙ}.W䶐b l;\w 9s;%?: G԰|x|˜ӳoH\G)sG0#׭y@-)>dB 6jf+2%YhRG8  gPC;t3f7hx)O!^?./IqPj:Ա%dոl G7^`uyk@<bLq+L9fF/je8rڤЯt0?3Bq13h1f%h{r*) -1\I˩.~6@`wmM a( gbQ36I,kwQ4r;P^8Y4NgeKPM&.Τ-9)TT’)_@.Y! %KRn縊 ț5a,Q49v/i'ƈ9ļy5L2qi1NÏdUv+V52<[Hgϼ7?5x7Pz:p3&Ow Gnviܭm~yt$J^ ܜU$g}:_pl%S[hʻ*zkDQZ> +*Zm}ϳRϏoz]6_;v$/vyb|9Sf$./jVgkl]-?y4)YL\jnf( B863dVkD51յ8ϻZ*m K]V'W@1ID ǹQŻ$7KAH,^4!NlVV~bGkhhmRWd,:15`8B~ !) ZA,WRf~FT o.ґW$kWUy"s$7+;o>wYror*YUM*1ۢm~BDE eYeNah*|nT>?X#& endstream endobj 618 0 obj << /Length1 1630 /Length2 17791 /Length3 0 /Length 18629 /Filter /FlateDecode >> stream xڬcxm&bNGۮNǶmvl:gϱ?3G}- ]UJ_M$\YX VƮ* r*frNJJ1'38Ōif 73Xyyy(bNV.uMZzzc0_Og+ ;73_kGU33 dSTҖVffg\AV&9+3;g3ZҜb}uf&VN@;=pXٙ\MI_ 98 dldUI\yX]lW 7kijoOIuZ9\<\el0rv= d4\,3d/?:K@翼e?srq63!i7?"mgn`e?tnfNj?3C7 `jf`7$e>(o!7r+G%ZR%39?tm@'jit`.mjg& %luoE/9m!&U'7AArƩf6ΤA t'#m[a&ŃIZCVZؗSk}8xJ~ _ i]sRvf z+ }o ֌svbϯ{,"qcيHfd 껖OW~5dZ!iZN^̹A#"5`*ƒ%8Q/L)!%R0#yG$ƠŗNZ-g}Ω㰠$c ;Gm0 X]|&lSUkаͱ{/xѥ6mH(o}nS Z{u{%8fdKw<zeb8RtNQkt~WdwfFr&e.9w܄ RǑ<"mPbȌ2[ej:Q Oqƫ;roSbɕQT{>nnYKyX{K!})8s%z3n06&5DPjSB2ynj6[rfr>L+Jrī9ml C=5<0Y ؿ)ZxQok>Ua@G[*@+j=HTd̯~߿cw6؉yn!oM/t\ DcY/&S= ջ%h;f"o5LgQV e!/@F O~^cSV --U-Mw:ⓩÅǾ /U,o=JY@X2`=V 6X[7RT?+pi}9*Y 6:E^^ܿf5M9co6eBBEX2Fqsր܀z".;<]J:eAw>\r`9Qnasƒ%C cmU&}OEN E8G9.+D,2g3ZkoPU0.CB H_JI/muR1^Fyd]Ҧ wvr ՆM7Ҧ5(z5fX7-2à9 p9}82J83wUZ\\)0su8(bMsyp@:Nh"}{_#4tIlt -8aJA9Ċ+', o^GAmW:iNy&Mp4`c̀bf&6u4ĩ1Zs{rzXlHq^vp mPl&g %, 2p98X cn cCcDIYyYwG:9 $ʁOZ jSΩ;n/E6\(q#O4Q!*k w =H{iɅ,:y)ɭԄdGgdrND)gP$3[J{Тc' @"‚~mID"ޟdܗ1IyAhL OHfqs͗Uztz'o9:?҅w⾦!_XIk=BvX'fŜ^0m*'Dn_"i䫓TrCdߥ6n\;*>Ij}2vX?`W]' sA:D"[/m @R-6IkdJ! ^XRezh觘XZHK )Fws_&9*sԠˆ~DS\ $.0*%0: z G}LDt^6^4$N8&Y(Ě\ޑ{X"p0^ަmnUړJs>xۚ"4i"2jaٽKTz~`WL6ys3ble8&DkQ+$Q4c;w~ϸZv ՁdAZY?U\)~{ w6=ΖBfu%կGʴFKhEaw+5WH#8Cܸ[`ejO!Y-{s iŔ Ry7 zPiPAi.7IW>Լ%aD5hwr?ܮRka K}}}0?z0ISNf ty4R97 Z~o4C+ߥ0PSZXŢ4G(;4҅S: ZF4 SV-\]y$_PNd[Uٴi oY*{7:Mы|ɒxXtz1ρ32w =xh Uj\nB R$1=oBmhIek{҉f5D½߻R|w~Eϛ~tgG V*1fr ϰEԧ+"<%!)IZ` :H&rljE܍96oj%: K9UR0KWO)UI_ z Hd|"q@bvNse.ܶ)=#t y蓅\*V/7)\LuM m>IK Ԡ'Wl;fay3C^s+ p*|71%%F֫ V^]o #iB} eMw (D9UP?Aro7qA hg5QVbjƓ UBR_x_ٗ/5;$s+ O q!{g-Ax *Մh}dAC[=i#۴tPo˷T$KrZQ͇G7;J1 6p4Duu^:˘ҁރ%Ľ=ڙF?.vhKBLIQPzIgٯ3oewJgI:jA{rQ&~,ibB+'#Z/{ܥpܻ 5^Jb)SPIX%U8kSnh;Vpg IRTlytb=兏]p d$Z6!';u=Cn{/* #q~3t =X|Tf ㇍oNYU =b:Q@`6a#gUIID Z׼FVI:)ymɫn>m0Qե."xt"\?`^w|Өcڙqa21\uve RY|$crّUMאU|\O_V<;7N'Y!G߼wt24z_pVacK(=:1lqd_rtIU[R9 Aq'V$@lg2=qVj0\:6T 7|aELMeug ӵ_0u*|v-!%+bznn3ir }1XJ$ '+u",vճÌՠ[b :H7L|*6ɇk#ŢUkE@&sd–v" W2c"bcr$bøA[#^gn ~OG%1qPo8N5>3bW0Vq{Z]3 )1,':vLN0JZ5F?}M!QWidz[(X;Gs6ܹ$a୘M7-lHr'øه9v,*BwT ;8Q͸[c'}k'm`m7@7zǓ f<ѹ7\4)YǀB>3:}N֚h'˵`ih;sS^Y\+\Be *@<@zCTOEWG栞9Tcx rh" /ؽpQxB>+F Թ?7DusR 540'm6$WύkKQ]ÈH(dj{27{wN w5SxT,5S`j>7C"#k@A{AhgO/V[ȸ7ũ~]1zl_|4eɺǸaZ9G?FRȬrY < %So$^jk gH,it:S׺e y=M O| 7t!ݨɧG^F SUP<-޼L=VQvUBWqn\A:tOw-Έ8a2f)B>ɪ Wu:J {Rgܔ{k֧#S{ `+֚>e)́;#'q]~BvfN*5 !1&M~gIz}5doG&l! {a?蔀\]J4C1[f)  z)\{,3`bwx,&ܣ[Ǯu\wg}PBh1B^U^b.+!\.wӭ# |DA/5~m3GduDRtZPSBqSrǻ(ap_욎p?$%/Z"&٭JQu {盪VÎKuHhzGl{{ݴ>!&)E<BE<" J1o4(WZ4|jU]qQkvTg cgk5N}réb4nV@ZtÎ`Oy&7ƨF ̜"DzhD 8dᰈa-jK$E\[1ƅ+![򍦆4-<0vʕH*WLmmlQC\8I$"ҳdk9OJ`,tC_[=leg)x8}%WOtv,jO=¾d caL#7 ?|Uj}h_z:0pd%e<0U,p²i[E<ݰak+4|ҥL:%`Lmɕ,HD$bImGiVWDp0G&0- QAhg, &{E"b>" @M36PKj)<.Ŷ}璲B#m摚W(yS 2ʚI (%Em|*IE$;azVNb?oՙ շCp5yn ÛT=1ji l1⻜`.]dtUu wC%bS;LM7ըY i%~򓣇Jn,L;JVv*8!j-lKyAg[ǫn(a#fZnO!ѹ&/|Eϫ$b XM +cm_̂z>.fM˫k^l?oNf-58D1sV{GR VaN =Hqy+atAr(@ ]UAYkv`89RgS? ½А^1*`↉ְR փs2W)"!(5x2pdh;=a8BazD)mQ9o?jfL"EދUsg l$>h I\)V29Jך&axO3ɫe'JV@z)}uW3,C:͔G=kbvT7uZsTeFʴR) /]1ǂ2m aDB"t~mr 6:p\Dg!(z[s=nJ!0,B{+V:9ۓͰ^7sZ^rNviZ'EB.%utXP<]XGCwrvCqV/V< e-=3Ɇӣ7)X($$D .ag!BM:-VMuZOҝ;G%Mo1SaF L֫X`KA ec9F~F:goރϨ <`C vųܜ]ݳyae.BSΝڹd mAmmDj`[)4KHN o)uVn0~E-.h.^}VՆ啗5 0(JN[i-P*vT4;~A]g[ۺq{W4;" yl"qri,b{ #4g!k5xX=P)/y 428REQvWAv}qru8llZt-}N`'U]Rܯ0Վ_ߍ0M >+Sl7lv2x\lPp 9 , ~>{IT5vAhƉIiIn ޣ@dmpTR@5cʿFh?Z'>T۬/I\cU>w5Oyp1CjqX(X :B3NeE_KpŵJv\gĐ )-]QZaZ]QbFV'2ֻ2 ܿ&;(CbH1gC'X;\ FЅ!)~dZp1Etv:ep[4ȋig\>-N 咐NO /?$eί d| X0m_n{yrj-e|\"Qjކ兕0V-5Yjt\l39C:bт/wՇ`߶@{еYG<kq;ĺd8طh;\mwb2R{wۆIcC9ݰVj]^E)vnLY'0%nM]ўTRhM}%LLV{0bUWMIhm tPA8+>nF7S`n߶ûYx)e/o_.eZc ?04^$v"'Nk"=V&3U&> aÏ2[@.<8sƉf){}#lrwPlٙҭE k͂.D|{l+Y+;*)sr)es5c'aѥ+$y4lĹwL榸Io O@=f炇m_A0nt_pR8s9HPv,sMts@X QmDy &M}uнrKPTah]=ViCG$b9`ً"g^݋@ݦP|ϓJHiGMr]@r{\e^IFn+)iOAm<4rn/{ݵAK|X}zԀ}P-̉ 왡E]Ӻ$zܱQ|aJ}LNEy %PGjЕ`%rʷBa,eji,G@, ߁*xH%g\S_hf~&8<Q8˭:/XPd8too4bF|k#w.+6c7`WCF{ɽu?q k,]}xMTb` 9S >WdqL%㸳Foa!&L6!ywrXph>Lp(8t BZMg{&;[0?!P2?}]Gjr2d:b O(gi[: 񴴗djSQ;2 g&>V \boȚ_ )LyP;پˤ9A 9Yݑ֥ьUW]Z h+C>t+ a]GXq. '+$o4=M%Ōq-ZNsB~}%4]Y~L 3xl z}2Lf٨lW$-V^C[|6uwskQ^ZPؑ~uzوԐ&60+Qy,V!TIb2$G@E4l̯g=T2G~d!Dڬ(͍s*1Akޯo;MVzx;Muie3::_E2rdðbD3YLlѧzoŜgGOm; ?Fpa@ĂYgܒ-҇vI﷼Ae TeP 1;^DnVd|\98i2M3 Dc6c9$jJf l$rjōgF*ŗ9[ =]3I1i;s)4?>ToHj N O~nR'tmt@8\{U7!F0Fd^ILh0$+)ŠH#1ѣOIn'ы,}KPgJr k-ɭ.ahrDғ9WE I)7ǗR"^-ڸ)/PɐFcc_2q|D0#$ψSYy֝~VG^T .&' IŠ=[>wEiwJ< Lr ]NmB.@;·wt`Iȴ%&dFL"oEvyYac'g,5{7IVJ?Rk5}Sz$Xߛ1i0A33m5M60Mtmysْen~n>1'j5'0#9"*!Ȑfi^-ޛZ=݊`i->hU쫉ʉRdj !6eA^- b)d %e\63m/lm2LrvP6&i"ԐlyûZGfu)@Q ޱ^f2MYkbgeD!%앟)imqzl̘ݪSQ'1柣Pl]5ܹ =B%B(ϧ-|!vG\)(F-J햬\Tf#Ha ҫg[<|b2+m ;GH(zgyP4T{KDtd+,$yCk ٨rs2au>tR[ n"A)JCdtΒ 2:@Od5&'K!{Xr8a%0VoTZ+|#87cm#ԝg,5ri;\lWMܡR8TÁ1 t 985*xޕsԛo$'V%*//Xϸ0n[wNopz(|ۙH#Z) >i=\@ċM[:җ+ Yܛ!<h÷ۂG~D9XdK#؄Uy3LZ=%̍VD1ө__۳:{{j͵{ϴTw7w~cMqocy c ?9NLKm~.7Ƣ,KM3^3.EkaUԺL_l=o8=syȅkeL0O_vѐ-2b\7n/eF;;]_|A26xAK]uh+cֲLd5TJ$l_q9_> ௱!\7B>J:ɸ BqzB[rk>f^>SJBFUϠddcpdu)ܦ7}hb/F7tߓCwfV6+`<$P11z ̚%4E|J?J-aąEb]h%mjg]E6d:#]B懷ZSQεs]ѫo$ P!3Eby{#^5E0]0ZdRD릒#*# yѬfwd[j<$^3(uf29&FokR"zRGX *IS_ K(30"#vkH' A`F war^.P<ۀ'rq :r}TJDͳc?k{V=7rn/BRZMeLHgd<j$5. ;\ EJ& 'y7&|+~d@~ ? AQuuv{hp%JH-nNP ,5"i:Z}/Vlw{ a𛭱t9V,?)`_A+Z7d G]XPgDE|N0&,E1<(gًZ r]+u_h_5JQmGxO'm 7|h8CakYFHYAaf/͡P+t>IO^R7>.ӐwkWi~_¢~𻮂G̴cfPG[㕦~Gws5FƧ!XW$ʍqKk4tYpq_ ]O^TܟŮu$K˹şP,T[wQ焁^&\Q<4 SZ@W^9[6*D?"Nʢ2T)PWzN7:Jzc"&i['Uv@pҰJV͵Wo_r "CDt5R&ęiieC`K'+yD6<ĭ>@V,Tg<&*;bٕA"S j! ќmm.U!!exClYn[~FÕDa\34Q9FK֜FaR&'?-楿1o0Q1ߪ4fعh*>=~u[)fNaЄ@LWᎫUq,}G?'T+p89W6h6Ƴjɝf>ֈ߈i=ci>iexDG5Q@){p"p+ ]~;RlFIQ2Gwe ^n ~$kH3V#DpKJTWLU[# AIά6KFj3-TI꺱47A,ӎɋhrRqmn)~F`(gQDƜ ha1͛H)$F/oP~c> W;i]Uuz۱3o42xQkDm2=j!!^|+44 ck{`OL&坃[AW2~0#}L_50JH8)%#:: 7S60縰>1߭(C?WuJ;<қcPa<#į+07J({̋ ɋAd0-dRGK="&_ dCYI(ڊuܸ|=y2Tkw:n&jNFQd ]p- Z^ZKԝ?lK?=~y}AEVLun ]ga>*%5;j73}=-w8nh(;ZHZdJn@i{|l]*:m_-ct;^KDSh?^Sm5) R  ]!~}C|\Ok$"cۂt24E2{LcBY+ԽǻAB}*Ik5sФ2U~W8-6\kWEհCQr,TS\t

7F~fk*ʒ)3ݙZ\ lIl](J))h:pk?Ч`BJöaCh51|`sMd]g5(4}l,h 97mm]S N_`}ƑySD47kL)5)k@"Un4cͦx g$/KUa!4b:ww|(L Xoᅡ5u6͑F#aĨ8,OYs@0;-^ |QUIK˨6>Dc9tazlb) x# \`9Jx#AÖg۹*@=BKc@;Ӿlޛ̺&m[^ g~Wc[qsdD6~R+j[9 0c0%Tx(yx -2D|`9z 5|3'#ZwaVBraA(8)zwWT=Q3Wd4[_ $@uw.9^V RW*E.&Xbmw1Km1` 7`d]27;azsԋI|)\%II>WiЁio ۦ(ZqDrٺF231/fb%ΰY]@SU8;f8C2!IFL>䦁_ ױo;"㌼ocO1[ax1pK,SqB [HZ@PU s+A\sn,`?ԋfQֳn5*`#樂McTs\; :P$U1Oem$&*miX-6J-ߵWCە+zEup-w%+3+}sf" I 0`UxuhEM5zHsL9kVIxB1pk ̵1MXNoFkL.uaOXi/me?<ц(sDeZa^M-bĢ}ݻ.F4< ;gP /mi#DZxmfx|t=Dە9ꖪF/9EmwOCzͼVekvR]l(ɬy4}~lc]!zR1׃F΍x4A(B,PLIPoIje3aeyR[XU ۠;j:N>k`,pj49䁍6& %y"==U6JF0l2t[= `?~DMw5#.zi>Hx"JsITʃx@<\iHo&&qUkay endstream endobj 620 0 obj << /Length1 1644 /Length2 9929 /Length3 0 /Length 10781 /Filter /FlateDecode >> stream xڭyUXڒ5N [@pwwww <8${p?9gefw]dծ~h*re5F HƉ6qvTVgT;^TTb c'A66+///2@lnPբg?WOG bkg qz;@'  Ĕud4R) Pv6M`S#`f}Jsdzq@W7)9X_sc86g^f%d`z{%Sutr4u9^*Kӟ؎W`kzhkWWlp9evcدdvpvۘ+9:Ҽrο yu9A3&dVטN6fElWh k@[;2CfVuz ''5OZQ4~w u^ @g@5ym-7d2NƯm1o( vN3ckkA U߿Z `dea7LljeGο! ߫xT$teT!+.ʯcnG<-[?EEm\<F6/'_D:+;9z,L,,>Yo46?dl|^WLfmM,S3Ҝjs}f+W/M [0]0}i[ng+tGMIۓBMlPvy:-ŢSEՠ7»6v7~.~8o|LSbq1j oo> u߳MLDo<۱r&Y3QՃq֙u:6k1;IIhA!1uY/2G\R2ycow"gR) 9W15i3tCҚ,<i*:c_$"k˓6 ڜItBU~:Rf2FF\dֱ ;A|P-HҀ'Z>bY9ٿ(ӁY*wE@)b@%Cuu٠K3U3uJhl[\~cR9$\ϺGOh&5"?Wg>Epm&ܜrmRi%EXM|Œ6d6gVk#t# "$abM7|,#=h$U.~Brqɘͩ¥ y rn 3Qɮ>ny!s)9΢jV24`J,q[|ʰ-HJ+_΋J HYWH>K?0틣&ƍ[u22JCRP뮊> T%0 TF*.$(1V݋opذ;ڸ86D/b{,clLIVC*g$n ,˙ѝVJ+6ޝj Ѧ~YwijtU[{\ȱmw.wgy,V/ k&j<(%Rhqetk޶#Ul\g˭ORGFiqYaa-I #k99!> ܀1/W$9JH }V1n.R@F{P y\o?S"n}GF"sDIS7﮽+O7t57`uy瑷{fFiN!8KO 1Ǧv2lnm!R~ӆCҵ%]> =6p_Kp'*™aXbDm|{`T2W~V!wV*%Zsrh4ES&nJ%o+Kf0*R(z"\S&iaJ'>`u*fk{M=m0 לȞ' DJ2]OO&e/h-7=상%a')\}>LΘDÕs$S"ܯu+vʓ] x?ৠp(wTl[hxI ,C9¶H6϶4AΏQ~z ߪecѹsȳWPhṢvmj#R:(?膆L{ų}>EBn1s3"[I {npqv3}_\*w?pc%}8 e#v. Q-Y܁/ee~^\(BW^Y ݛ+'934ʛ5PA Ee}moGm]: u*y/Yz\ܙ`X[4p Obbx7+NQns fzJ槅s.b  _/}żP( ׋9ޣ~j G/u׳*x871R嫸_HGA۸b\Qy,*xUF >4$J @-p-'[M#^8N$WѱYiz8Hs.IȷWxA[M ϛQ'cޏl2jaMRki-U[C2 K|ND(wrS1F ΂2iՙ]sxϙZ)2CPRu~N^Xe?y=ԙ_}23eg&2U5 <[S'Xֳcسk9tZnI` %3.ZxcS*h'-X+r/txcpTm -X݌W8刧d%g#ֈ4ٞ+`Pgj|T}W˝σM91?(py60g ֩5Zlrf@vi@Y``kra@kjԈ }"yP셕w[/%f,RVIqlNً8L%* d)5찳7Sa Wi|0Lچ0F򎝼4?4<$ʎu!;pk~H݊V:h2if%2П ^Bh^ziq΍O"dω#69`U9=f$jAH.;up x/8!' z]60<-=jPkALd2^ ᐫPł$dEͼj3T|!)aos㍮A_uEC#[$<ݯn ?!',7^xk_ݾwq%( X: M 0 ]izolxXd{:I[b"eW$xZKL~1Sx5hjE䗾Q0װ-2!isbx2 } 790#<5/{_2/=qFROʬ)61K<` cV$2%B5fL~sHp?zdll>гiݧUp%FZO< 04foGLWz^[kJXWvv_CI^-up{`3kO3;`o9 b8Ǝ* z;icwфđ/rqr -n`ym~`)o mC@HRLntYI󣹧MO/@f!;?i*GS3RaC#_1\/DpI$s=ٛs?SqHD &,Y_A:N Nr+ԨRWഔШ6UT6* ӤlAc~$wDKSV7(S|âHbxl=]MI3-w0aj^ P^Im5Ep0laVGI;/1Qo^R-o-0L@ZG+/w̟Uq)(D׷(gݹ__dUjk${oH|R]ω~O?N>5pƨZa@Dd[`.|G-)hd N+v볅ޕ$` we' x]λNYK[M80H!sˋb 8CewsaoO yi2Gڇ! 7LC*FEb[,FO?&v= -GC3bDcfRqyL/Al//qώl>tè6# +9ϡ5nF&\.ڝZfXW:뚣1<[+b4N Sس+rF@TgQ_& Gf]-O﻽==ˑP[/okg"yLW~{CobW$~JaPڴv9{};{3@-p$ᝥ3=1VUQ$ʏ'a81P6FZ񬔉DajmI:7"5 yzxZg-4l9*WXAOvăS.o~u>HEzMf ΆyK!WRJq~oh8`abϚv٠crm(k z4@ڐA#t8t}s s{X9*"o 5l(q3\* ߊ&=ڹ\б 8>O,-"['cKɜNʖz-u(5x7|!J5kO劧\7eW `"i+!G?B~d5٘щ%ϛHEy_{ >7G=5A]]&s~CAr\MMxdP5YΡVsHqe{~:phs9[5[nZN.읬6b⣌.PsdH Buޛ(lpTm! _.AWK'tPԸsЗrj>u#s\z%XEVpt{82! %8E]oSdz`/kT$ Li7e :miV#KeUO=샬:W_P!ꇃ'8 oW<4{"B&0pXĜ Ȱީ2=%s_Qf^ H|P>zޘ/* {n_K|{eۗj^tf.6{ݖ)QN23Es4񴽛 }L;ہ-62lݭYxя Aԣkt>n[(s2+A.2_"(soz9iƱ2&KS,zfMy%QJD~ Xv瘱wX#jGїbtQ;ɱjuC\ð4 -X] M2TSkӡ-P93\J#򶄞½i<\Q+qNX2xem<L%zhY !,%tJٟ|"Y1S? 9lۋ #WolnM-JU։ayo-d(Q7L$rݱ>k˦tv>wu7!9n,,- f%q:} [=l .w~P骽.*VQo(!8~ysX"Xgl${Q֚bALKcF妭'`yKLv+ ! TѤ:QRv0+Q;S8vQlgfHWq›G 6@> Fa="q\W>cD56D=dd-oV!.}/%?)USMYJ鼙Cdy[44ʳQeP] \+"b8mO[d- 'K>Qb~E@X0\ ב6Р%DA9MzCm݆.U(K*vl` vc }sc&9%RQO>GI?Գz3=c-xڡ$^Q_<@iL)lQ|G02 > zN)8 XnMkr$‡9GǑƒF[;cZ&êRwl$' .O`ݻz KeQKg"b6´TFKȃ.#l-juUWi5]'eZno{k ]y^̠kœl/уیI7du Tdm6@+\@PT(t%UfX[/>Z-u  QÜ0spFl>k& 7 (z:I[T 5uBfa_L[hfN4>Ąu6rtg_P*>(JpKZ`a&4. "_ Djݧ=ʉ1isQS5)V!Lql=P͏@xԝkXv?]"}vp@&JYEpViɔRUtV؈WP>: GF+f`+0EN" a绚B w6;HEM)5"*YB}}l=xɈM}>"cԞPȤ#)HpuIO!PF %6п M#9>Hfi@/A>Q8}@9kq,>^S* .QB16c) ?g\ ;RBi$(f2A12R9u$?./ <taxѾ86<]DL=$h|l kck^3UgYM.\5R~N. d ұUGylHPj]2O-bV: jN=~Ÿ7ٹme"uc K~s6~ɩ'"w;LLr r88<޿꫐gM2EZ؝)0 ;g2z5O:?캈=T2$w#A՝[)(?up+Es0czkQRk6;ɭhz$Qh|Elv\Bge7iG75Z@mD7 `EX!}4%0sh؂JkP&\"ZSB~)V( 7ъ뙹̩gAcrqz"1̼ g$e<&!ono:L[ ('R%p@+ nU0^3KƄ~ɻڏ9.̎>eX <ך iI8ϼ_-49l uIdڑUrC "n#5lxd9t@h%Nfu[ ڏ.Q /4Ǻ_g# d!t7^ܣ\O@=֟yzzJ!D7ީw Fɛ1=~dd[D5r?iV2A8{zi¾툜{ ڋ;J=Fbk|l+7-݅U -S$Lkk94NPEףrtqHH=zlQ{k|LrR4X\JN؂̀(5yiQe+T"^mvU,]&d5ͧӴ; A<s(>J7[C4#wR{q 'SSFS/S=_ 9⾌`92_(XЧ 8SRLų<< Kbb%`7ef恮- `/nXq&PnPֿ,i=r<ۮgYNgUYb$fJe2v RsP&T=^vtM^9B3ESI[EiDƥӾ?Ez!}{R endstream endobj 622 0 obj << /Length1 1647 /Length2 11610 /Length3 0 /Length 12466 /Filter /FlateDecode >> stream xڭweTܒ-\ ƥq`-xp-&wΝuߛ?GSjWujP`77AY8X[Sgu{[e{Eu<hxxPih$@(N  tf)0 @H;x8A,,z-u&&YrzytXh_mlvWq Z0@REUO^Y@/ہ^Pu10wۙAj͙K8;A0; p;B_ '^da;CAN(5?Zv{WO3{_-ҼP Ce Al_  W'JO`w^Y 1gE| 涀ء5/v\ f lʦl}M '+'k_{wjec^ k P- wo?BU!V!2w* m^/ob~} bg7LK @`;U;`ҖQcoΪSp#wxp X8_s_g% 0`geg~ӧٛ5GP/~`;4o JH~%>&e=ġ^0߿ھ/5lC&a\ci#P7 ]W2,ԇ'mn Q FڱYşڻcjFŏo۸\0}A)u1 08_ it]"0eǠ }(&N7g?|.hn^iO.Tvӿ9J %R,=ϿXJPBICdRE>_]` b&3}Jp_LaJ?wu-dW24$\Ԛc&kz$p;+V&3.!݉z9QJ>{qҗ+:K>9quNcho8*8~p#Y5.eS. 襍U`K0^(%umzX2޵b̿?ymc6 I>l唍ɍy0 >y>Cb+重;[+CsCY^lv4E O*3D?%G~AYW] ZQmPD7϶O" 05l#հBwz׉TVc+sR>(A8n^H U<6֊KNS/U_>(8S,GHyw(|\”oZ"l|a{CG L>2yÕMλ \2Hǎ75 4a1bŒ %3N8!' v!27׻WDLNxbi۹GRQ; ̹) q:]ίfuR3s/k[ɜ0Wܩ=ӕ︮.Vz.ʉFz-` ޭmPCGaEZ7sH4sљl'Ex0 v&\=ϭɞV$%ꮎ‹NdDpq<9"omIrơ5֨F$]ָөc~cB_@5L;{fu {3S ^KQ/EL[>lLDu7xw':\[f "U "OzjQ1x$VR<ZdssyʥcM1̺ )"}Zh})ٳMdcǵ_4Hm -\+|tWKKX)-}y-iniD&0sy%m>1(_azckeάB3*oh4Al|8˖z?$'?ncD`ΒBXG6zGYBm\QZǰ/-E38^$\}>qPa4b)3CV^B#/`ZJ=iUou#{Bň n݇:.-?eF8 W3`;3re(3T"Rz}R11\ o `/cÄRP)!DUVz &ZV?FTy`zz ; ғGIKL̠GD`e@2wܵٱ~F~sfdX&$lsKظ{Wg;P.@F;ZN_SV%j :8psvWʀKpM/YM&ŸE3pT]x;/ !I%-E-.ÅOGgIL@{0bdX9 x!5S@hͱ¡Wy]4pŰtI4~=ѓ}Ɓ tFC_iWZ.քtYT!ܙ%O Zľ;6IwS&V}eδ\ RJnKZ .n‡02I m7Ory$+ g'τ'$W&'(C#ؼĞUmswߦ.2[GZvXv[=AsDfɓ_f 6ÏoL`>`cawFyayDShƛOJ~Bd"zEbTil8?Ҿ]&ݐ+.-I[ ? ;)Wy$ܞHM^[sǥ%'?aD#vXÔ?Farzn!%yGvCYǯUR~ 5]J,.js_cZ)9ߚ9kmԳI4ק[_+bQ; QUS;rF%=xVuӭBщ1?%QOD륧q՟-\}~!q8hݱI@we&&KP`JZabڍwl+<BHs9kpIRBr `h[49we ƒbCQ}Dqhb*kMT EOK(go3#MAݭE~XP@pKig|hs aƹc 5ʀQ״yXˊRi]եGxtk̤kAp-CfĆ ]՟&<6M-i}?YοH-{!ЊAwb h䧧Db0f"xȔUg aXn,aNP6nZk;*xEF0Bϭ 3;1V)j)<1btS5;}(ӗS'x{gPtw(G*C-lDEz>Wi U{dK61&zO*stBRQ6b5oR=L-Rgv2w?k UG~o{Rl {q13^۝3"[UMwNfW"WXc&+ɽU>= ЯO/ ӧ'e7ub,X\ϳ&ypu4/j}#st m?sN[CR OJlE#6:߈ 2BާL,P.s=/*ۃ5ͬE.EvML _ ]k  5؝`Bx9WtbV盧?LC]F jh)@L<-4Nϝ:<(pGU梏Vj翮6P&Z2C ̨/N4w_zqYk2  'Rp2}~V3m$2Xd*'gWk𸔷(Yf[A>@){[8nW^8S:N Tz:c\ȝוMezõ/J,FVkG`W==+`cS+!\S|= MG2O"4{}1'pH E^W]m^1*N+EhLf9Hxn&`%"Wɣ5Q bDŸ$o" Ϗ U\S^F=ŽH (2z$A.;yxO.y@yFGaAg큌NΊa@&YĐҴk}4lvR3a"3Z$=vހY}cws=d=84N𶍚ѐJ354.-فhÖrR{=a/Itq ԖݜcNsb2lEzM X#Vcѱ\#N3@)̗w)l? קWQ})7,r6w&+/4.qߧ-_»}&m?z+m9ǔCbuyuq,!1;P⒡B}=x Fћ27b8$NJ",gt==3M3'.QnaE;Hw,B<zqU-%O?UeIM19G:yJ"?(uѰW5kClc<`[No S(s76 aNyDUZJV6HͲY RNY▮-%a:Ha5W?V\qM4V\MvKRĞ!r`}5nFˁ^W 7sd! η>$65N~[୕*.ߙ w#ߞzvD@J'#Pw|]_ |)Z{Jb 0U+8W8w-WeeyXlx Ç53xDas$O!nSڎN vS'~S7~E%K߄;(1k.14!s߃wEϰM|!FrDd5 Hd&ݬQlSv-V ޮ;17 V7(Y]1%*J`&(AwIɰ8HDDiGLR֓M_UƎ6]4ohbړFw" jmĨjt·L"Zڝ'=+OG!oc0VƈV;*[;i l|Klgy xOfmېq }|MxA9]L-t}=R|T|IEKloC/hڂUڇ m<LU+>' ቘqTޯ֏-KS? oCѤ;˅ GO:ĥl…K[lC`^Cx#3ɴO⾧>alPg̚w"һPہjn:qMvYnq;_ aǭts0ו$>5 UcF$0юk$_75ve[])VW;Oήm:{YjAK<Ŧvͦa)@癄J~.+J.dg&TFGTv3L"5mUeE=9 8vCUj 8zN ]D6HU"|U{fdE`}Ar+eĐIJXQ*<5T6ZEW|KnWWLә\ 8pG:] l 3sm+8d8Td|;,}Eg? t]K!DG)EhfzWSƆE8I纵`udoYC'68G~>=ţD̍=賾͢Śb=BdXT݋8)0BqS>qّy !3#bOd6ÿUZjc"p43“$,f(n0+Q4YٖL6!}72P gr,T.49ʖ_7k 0|w˳tqy(+OoZl' DR:E$@tҋ$aT5SKˀAdlblbT;ͯ[5"Z}qVkU{ Sx+ :Y޼17zWeL4R茦G"}I6F4]6#,IK2'zP SMnNۼ]` i+œ0nf` 07XsNqD)%-(uKiq46o^`OP?yU-4v-/t8_Ȋ_|ځ// {atzxaHa[?e51$sYٳGxaSM!:@ARG /JB<6l(ԥ"e,%2Pn`0Y~g6Dl}-%X΃2"|̰]aDLH Dn O\8Ϝ^/.({ l*U r2FPbhVvʃU$cwPx" h_w\Mж^U<'s30Ģ5W{TӻQrtG 5/({}-Ufo;eٮzc:k {xM7ۓ)=MjoOkq u"Jg}U ͩMp|f^ET番D1Nx6$8gvd0V$o!H*o6 %d?yILtٳ__[٭U!IE>gR ^9 %$1YGRYDjٮkP&s l ;d]8pL /W%Nr`9uGo} 'K͏sM+|QPAR*. Iό) e(-F8گ7%θFDjɅ#RE..pR4+S0؇ b̭#ǻi}ƒ-39À ")>OFhq1pRK݃ArUXu҄OQ1H4SUO h >q"MV9.ٕ@)]ޘ+b[^QcmI՛% 3 d8OzN6 IPpa;Xc׍_Tœ15U;;vNz2f1BѨTn7g(l4 `HV:I r[#G.f^BBt{PL Nc>l*]z=tvL M+Z,$a&.mY>~Bϔ# $C1ܚgzP莥ʛaiڻN5k {ELekB#ǚdgV '*Ȫܻ4·=I*m16O'渫;6ס{\zO էk5˹|cd-h4_-޴yT6KG+ 7H2521|p =~jL2?7g^xttW <R-y~d-W=YBBRz؆KHD65?a8:7- %Sk.MtWSD6]A@F4;9pӃ浧.;`ќ߿f ˰z3d ɤh^r;$D!s ˰ly}rbMXǠeY[d YLzcHavG M+".t딮_;dVO-ہFaK`;K!EkO~3:jE~u4d:R`=DLiCݠ #qw3'h[{ܤX;H윳%Sqڵ׭е afņ|j?%'Z A 6$) ,fMFreº8R2OA>8Շ/Y=(ԶGMB2)(^Q2薸d":"iѱVi?SBvUѺof .siib˨]ef+8eI*zZ;V6>3;"G+@j,܉s/°rIeO@RcKoO2s9 1o+:܄8N̉x8YcˢDρ@q3iTc>R\Uq^(o^će叼SkuI`u.ȑ*߸xW@dރI2| ncQRv*7b=bcG;0؀)D/+DˤGϷzU(#tQ,ķ6. iOf)BW&Oᆭ~HP!uf$p= ־NR^U$;A#`N%x_6ޮML*Ȉ`w 4ʔIY,*rmP&5R} ]a|/$l,3j-MŖS̳}Cldob}/"^)nuڊ }ѳއ eB86u0bL䎤7)N*T\@1$[ֹ ߛx)W(Pze[x=|*|pQ!.uv֜=I6 + (\ ٸ/c[̳'sf&I՘װ[A0]O$ !DD*nL.v.E{33: HrBت}.r%w}( XFFe&K}ǯR}k7rFJ$%_1vɗpm-:W8L$~}eޓXpYN|,c 4hcX[@Ryńiu1F;/ _A"JF`xN,Pw ﲍGܭ£'蜲m:tEJiߺE$Ѿ\.㔘2՚(t~勳?`\[f`Ǿ^ϔX 0LJ#HCgf 8w  *94jz'*"h祥3MF&>7lZm}KJF!e$/+xiJ0$DԜ0b?:a=Ded1sP,έ˻'0IB]h3'Nw68UdP [#¸QaQ{b6fQ{0O _#G@B[ښoDG%oFH>d`\TlXQ3˓4i;orWwڹO ÀG.HʌFB߾@%LHT K$|!Hifp6ʼ3Y˘~ƮJi3=_YrAbv86h>'LLr4͗$ [:m~׆Q^`/uxH):nTjs!/ӆJHh Upޫ@~L1Va m%/ ᆎ4 -}jhBͦ,I30rM*pkyZkjP;ƽ8]Ex(3z8ɍ{maBB>?:` lpTPCǬpY 4.sP vrk2h\ItG/FVF1X(O(8̢C,vt:-V~qMRE:X9uky^gpjc]i7wf2r<z\]͔FN0 毹J)`VYɓHqq=sLZmXR z:A H8 K 5`YI֚7is3ȩ\0t5:-)O8pE{UtYpƶ endstream endobj 624 0 obj << /Length 665 /Filter /FlateDecode >> stream xmTMk0WhFG*! miʲVZCcYy#9햅ļ{3񸟤e&Oo]&C]]Mq>zwt߉Ǯ)n.pCx?nڽVgx=itO"i [\l\WM}'ԭ̚t4pXeȉeU oq yM\-CnCW_Ey}wP dZz891euB)] W-\v\]~[S!8&+Zce"'2Ɍ5I@|"B2AQhSlLء28a}ɑFq5ҍnnbfǮCG= Wܢe$g;A,:sx l=NOTƘ$0_س/vЧQ%~Zx pX2]$^qnaK??q FqMyc0=) &l(mi,3|d &\c ]͹&ӈ9w{d-tx\ \cΜekqLJs?<@>qhx .׷8wl~1V<*m"mmDa endstream endobj 625 0 obj << /Length 664 /Filter /FlateDecode >> stream xmTMo0WxvB+8l[jWHL7RI;onDo3ތ?n~<&Y$ŝK_IsE77E[^N\5sߖ;7|[lzmS_*7F?h3΃;mc-bB`ew\_7oK׽;(2Z.ETz}ܟ~o9V^MVK7-\f\S}[S!pcSs|TXo1/ȡ aeuC> stream xmTMo0WxvB+8l[+ML7RI;onDo3ތ?n~<&YվI|/ŋ;t硋nn\3<:Wj\=?-wn6pGۦ|Tnʽgxté7~qzxKlqrnX7UޞMjuSAxHiQ,'wͱ 1}hW7q{UEݥ-rG*F>NNL7u]tNhWS;wE )b,#TTHy=)9>*QKr7P:MȡQ^s$LD6aȑ*s.$S56`>ƄmÁ#TL 5kd}WXssc*zRh/#? bE$L|ږ8^y>eSQc̯bV̯cNa'_OAJ195kd3EH@8ܰ%~As*=F 0`{RLPh33Y$LƹǬ oqMsȼ tx\ \cΜ-eksL ?"@>qhx ׷=l~1֍>*]!MBa endstream endobj 627 0 obj << /Length 665 /Filter /FlateDecode >> stream xmTn0C6U@"mTt@;olvR3ތm~<&YվI|+œ;t羋<]3;Wj|{}[ mmᆂMv{Kt=c_~B?zxoBS6wBJ)X7UaMuSxHiQV,4$O;nC-bD/OCnC_n^ѻs׽9X2Z.ET~{~ʶrn_~߼h!R,6ew*ؔb%k e+Kӄ$a"1x*s.$S56P>Ƅm„A Fs 5577vرϾ+uaя6R:!,əCxg+ѧy*JcL|*m:fvuiWUꧏɩ\g%<Ϛ"sÖ0_:3x0kjhyIYx0aCnOg3$cx0<<v5O#ܵu7A 6*sZ ZcΜ-ܠeYksL ?"@>qh|tngk;dGGM@c endstream endobj 628 0 obj << /Length 665 /Filter /FlateDecode >> stream xmTn0C6U@"mTt@;olvR3ތm~<&YվI|+œ;t羋<]3;Wj|{}[ mmᆂMv{Kt=cߚ~B?zxoBS6wBJ)X7UaMuSxHiQV,4$O;nC-bD/OCnC_n^ѻs׽9X2Z.ET~{~ʶrn_~߼h!R,6ew*ؔb%k e+Kӄ$a"1x*s.$S56P>Ƅm„A Fs 5577vرϾ+uaя6R:!,əCxg+ѧy*JcL|*m:fvuiWUꧏɩ\g%<Ϛ"sÖ0_:3x0kjhyIYx0aCnOg3$cx0<<v5O#ܵu7A 6*sZ ZcΜ-ܠeYksL ?"@>qh|tngk;dGGMc endstream endobj 629 0 obj << /Length 799 /Filter /FlateDecode >> stream xuUn@+HɁkc{!5&Q^ үル!zya/W7/~jyld}{9}N=C'u\W;oέO*k`~?''3Ɖt3\;WS]Q?SVk ]{9FSѤoG^ 32j$WC0h޼O~wC4Sy<&>U]Rn·ÛB~,{_=ڰfhm_}4zu|sH]Wb MLD!E!B FAC\dQQ(%T<#h^QqKƊL0cF F͌a._Q mPG9'+X38)+ι7\'~5:r%%Β뤧$1$܋a %aN*Atg&W̡`92/X[B|fAlI)dKdgI$[d$[H$[hv-|9~ddK%[w-t--d ~)BO)Rd dK|ɖNK)K)++Ζ]Rd]Oz͜|x8?<ᤥNO]?p@}_:h? endstream endobj 630 0 obj << /Length 550 /Filter /FlateDecode >> stream xmSˎ0+$1$#8h@Bܰp`~ +8*=SJ]sCM&ESݮ`w z\ħmbo'ޚr028~}uHXz_z.XA_`1o"xR:bct\$7҈٘TmH@ ]W0ywznͩV+1r]oś}X 6g1ͭnm{!^ ' bނP48YhC`୤\UY=0ZĎiơ 7([4r;"A"e"qDgs"2dK$#В%#KDDNs5&]J[/G endstream endobj 597 0 obj << /Type /ObjStm /N 100 /First 853 /Length 3683 /Filter /FlateDecode >> stream x[Fb>{R%@B ٺ,I&,ucR,e1&A8\}iWD@ tVMp/` 'T{hQVc p9lL R쑆1!† ,\ \, i]iZ }YWiT'/!$}*%/ QR_HF9,3yӵ%oBxݞ0;xGCWymw{|wt8 hzzqmqӭ1[urձNs71ѿ}t"£SDˠ; vh`~t|">SOeg',v~aM׶]t5K?v(Ulʪڢ<~9YE]I|Ez$̓Hb)>Z(xBAEC"iv}:_7~$y6}P\)v_5 :u\M=>G1}B)> 3^W5ooMhXU3,fthMRh6iimUk~HaĔN˼,b9BSU4OtN٧ΡJؔЛMZЌA?Мi]-h)-V+=.hIKXeeŵZA@ADY9|Uӏ* *tv:Cހi~3󤾡 mn4͟%]U1eOO/KZ'՟d`5 O]u}W={s7/ժ~ϜG ISn{K}rޭN9{<˪g).|ϺTJEo=)uQ.^,ۣT#6&x_`qݵ^QG/&#/d4 9 sQS3pTSty[Z*V3ZDVٷ?#r#˥*1*E}&z\KYy`63 .wL,,Ғz K&/I+.~/?<ǣʓ'+a9piP ~J< J?|bPCGlooSzɭdCja[;~\fFڃc󆉻ˉ`~!8 ِ\ut"c\xiE }a.»>]IO}XOof1b&2b:tFlLߑ) *獣juo(3Ð/ ˙p]@;:w}YW(|OVj>/kxWnU78H1#10FY1.;`l |f i#m@h3̀Y8zY}Mg=p>:pVc8+gY 81Ypr g8g9q倳Y8bY,|eg1,p>"3pc8sg813Ǚg63qX>cYc }&aIu{&R<]̋3(U~G'ܛNOBGf2ںI@A3vB|4Ubl5[zrn *y N{xݹ@rIִ2iZq 4;#Ѵ\, i6h݃@ rk{*Ow7=@DP!x6-b7g_'{؃lk~% C{ءMniaLr|w) hҪ62\HCi3~=WEV=iC> ʒ0y̳_&msun65$!8flvےm.W*nH;+u%nνhN?/b;U}[05Uҧ i(¸ m8 X"r_qvA+/<鯫r՚]D#uJ!E|3ؓNA%%$>ChC)ә*&Mf'WC!~AlBN/ҲH ?=q?'t(ʓ/, a#p{y?잹]m_ҺMZ*ZS;؆dK1AL´*:`gy!s4Wt_cSBP/'Bj2^g6 oAj'KHQ<4/*qDJ ngـ~iQ!"eLӽ\իx =qR6RF! g > endobj 650 0 obj << /Type /ObjStm /N 42 /First 372 /Length 1763 /Filter /FlateDecode >> stream xڕXMo6ď!iOA*뵱+Ϳ#h&7?> ԅЄh >G #Ȍc$49B@ڒS'bu019Qr^#^~ժK#DpCr 5 z'P̨bSZj./#V>R>s ':͞{ywqo+f;%(Cusf)f_ Ȩհ k{dW$min6o Bc/PhłE">4h\asւ*V5F遊F@2s[/GN?RH?IPRB['P"oІ\Qd&J#rk(Kӑ[3ezBP.\aC)Hq7̬WC(@Xzec6&u*Qu*df˼mSե. Si hS h7M+g~MaM)K<Šs!8Šs!]ǵR+P֕zG(떳[~ǖd&dlzocNˤ<쏷 1MI_JҤ!?kk7 w7wC2lV*J:ʼBZQJkH:J^cK4ԤSʣsJ 9YLI8;LJ% kJ Q|ޏ'!(%{JzJ3ے<]b$2|xnFRKEQ*@*mT~I]z+,ӝ, i~y?n6oa=!Sc~_u/ <03FF0B7CD81840492BF69B13DEC73F95>] /Length 1604 /Filter /FlateDecode >> stream x%KlWEgWi)A;͟?P EKK[(lQъΘt+?D243434F*'}z gxeUZ&1 3 3 nH'iڢ'#q`Ii56kv zf2*MҺz\k6ç&`m `M2D6}^C3C3C3c0D2D2D%ߟ\ohTׁ~1 5D2D2D2-:`MSL""N{I[.*X&'GJZg1|2|Kd7$M@c ?A'(+͛úvh,8W28(< hi1S|ЙKrS>+FޤFZ`,ӻ4Ȕ8!9p,J]Y]Ⴟ,F.jIѯ7K__8?@W1BWC11Xi}b}DT %.1\YOA)0tTM*N_:xE7fFưa2;UƫW2\2reg 1,sx9?gȕ̴g=3Lvf3R2Je(Q*Tfr5V&.kefG endstream endobj startxref 148360 %%EOF dtplyr/tests/0000755000176200001440000000000014172104725012735 5ustar liggesusersdtplyr/tests/testthat/0000755000176200001440000000000014172110611014564 5ustar liggesusersdtplyr/tests/testthat/test-step-subset-separate.R0000644000176200001440000000363414126601265021763 0ustar liggesuserstest_that("missing values in input are missing in output", { dt <- lazy_dt(tibble(x = c(NA, "a b")), "DT") step <- separate(dt, x, c("x", "y")) out <- collect(step) expect_equal( show_query(step), expr(copy(DT)[, `:=`(!!c("x", "y"), tstrsplit(x, split = "[^[:alnum:]]+"))]) ) expect_equal(out$x, c(NA, "a")) expect_equal(out$y, c(NA, "b")) }) test_that("convert produces integers etc", { dt <- lazy_dt(tibble(x = "1-1.5-FALSE"), "DT") step <- separate(dt, x, c("x", "y", "z"), "-", convert = TRUE) out <- collect(step) expect_equal( show_query(step), expr(copy(DT)[, `:=`(!!c("x", "y", "z"), tstrsplit(x, split = "-", type.convert = TRUE))]) ) expect_equal(out$x, 1L) expect_equal(out$y, 1.5) expect_equal(out$z, FALSE) }) test_that("overwrites existing columns", { dt <- lazy_dt(tibble(x = "a:b"), "DT") step <- dt %>% separate(x, c("x", "y")) out <- collect(step) expect_equal( show_query(step), expr(copy(DT)[, `:=`(!!c("x", "y"), tstrsplit(x, split = "[^[:alnum:]]+"))]) ) expect_equal(step$vars, c("x", "y")) expect_equal(out$x, "a") }) test_that("drops NA columns", { dt <- lazy_dt(tibble(x = c(NA, "a-b", "c-d")), "DT") step <- separate(dt, x, c(NA, "y"), "-") out <- collect(step) expect_equal(step$vars, "y") expect_equal(out$y, c(NA, "b", "d")) }) test_that("checks type of `into` and `sep`", { dt <- lazy_dt(tibble(x = "a:b"), "DT") expect_snapshot( separate(dt, x, "x", FALSE), error = TRUE ) expect_snapshot( separate(dt, x, FALSE), error = TRUE ) }) test_that("only copies when necessary", { dt <- tibble(x = paste(letters[1:3], letters[1:3], sep = "-"), y = 1:3) %>% lazy_dt("DT") step <- dt %>% filter(y < 4) %>% separate(x, into = c("left", "right"), sep = "-") expect_equal( show_query(step), expr(DT[y < 4][, `:=`(!!c("left", "right"), tstrsplit(x, split = "-"))][, .(y, left, right)]) ) }) dtplyr/tests/testthat/test-step-first.R0000644000176200001440000000335214021443044017771 0ustar liggesuserstest_that("constructor has sensible defaults", { dt <- data.table(x = 1:2, y = 1:2) step <- step_first(dt) expect_s3_class(step, "dtplyr_step_first") expect_equal(step$parent, dt) expect_equal(step$vars, c("x", "y")) expect_equal(step$groups, character()) expect_match(as.character(step$name), "_DT") }) # mutability -------------------------------------------------------------- test_that("doesn't need copy", { dt <- lazy_dt(mtcars) expect_false(dt$needs_copy) }) test_that("mutable object must be a data table", { expect_error(lazy_dt(mtcars, immutable = FALSE), "not already a data table") }) test_that("mutable object never needs copy", { dt <- lazy_dt(as.data.table(mtcars), immutable = FALSE) expect_false(dt$needs_copy) expect_false(dt %>% mutate(x = 1) %>% .$needs_copy) }) test_that("dt_call() copies if requested", { dt <- lazy_dt(mtcars, name = "DT") expect_equal(dt_call(dt, FALSE), quote(DT)) expect_equal(dt_call(dt, TRUE), quote(copy(DT))) }) test_that("lazy_dt doesn't copy input", { dt <- data.table(x = 1) lz <- lazy_dt(dt) expect_equal(data.table::address(dt), data.table::address(lz$parent)) }) # keys -------------------------------------------------------------------- test_that("can set keys", { dt <- lazy_dt(mtcars, key_by = cyl) expect_equal(data.table::key(dt$parent), "cyl") }) test_that("setting doesn't modify data.table", { dt1 <- data.table(x = c(5, 1, 2)) dt2 <- lazy_dt(dt1, key_by = x) expect_equal(data.table::key(dt1$parent), NULL) expect_equal(data.table::key(dt2$parent), "x") }) # groups ------------------------------------------------------------------ test_that("keeps groups", { dt <- lazy_dt(group_by(mtcars, cyl)) expect_equal(group_vars(dt), "cyl") }) dtplyr/tests/testthat/helpers-library.R0000644000176200001440000000011614013221715020013 0ustar liggesuserslibrary(dplyr, warn.conflicts = FALSE) library(tidyr, warn.conflicts = FALSE) dtplyr/tests/testthat/test-complete.R0000644000176200001440000000125714031070705017504 0ustar liggesuserstest_that("complete with no variables returns data as is", { mtcars_dt <- lazy_dt(mtcars, "DT") expect_equal(complete(mtcars_dt), mtcars_dt) }) test_that("basic invocation works", { tbl <- tibble(x = 1:2, y = 1:2, z = 3:4) dt <- lazy_dt(tbl, "DT") out <- dt %>% complete(x, y) %>% collect() expect_equal(nrow(out), 4) expect_equal(out$z, c(3, NA, NA, 4)) }) test_that("empty expansion returns original", { tbl <- tibble(x = character()) dt <- lazy_dt(tbl, "DT") out <- dt %>% complete(y = NULL) %>% collect() expect_equal(out, tbl) tbl <- tibble(x = 1:4) dt <- lazy_dt(tbl, "DT") out <- dt %>% complete(y = NULL) %>% collect() expect_equal(out, tbl) }) dtplyr/tests/testthat/test-step.R0000644000176200001440000000425714126601265016660 0ustar liggesuserstest_that("tbl metadata as expected", { dt <- lazy_dt(data.table(x = c(1, 1, 1, 2, 2, 3)), "DT") expect_equal(dim(dt), c(6, 1)) expect_equal(as.character(tbl_vars(dt)), "x") expect_equal(show_query(dt), expr(DT)) }) test_that("group metadata as expected", { dt <- lazy_dt(data.table(x = c(1, 1, 1, 2, 2, 3))) expect_equal(group_vars(dt), character()) expect_equal(groups(dt), list()) expect_equal(group_size(dt), 6) expect_equal(n_groups(dt), 1) gt <- group_by(dt, x) expect_equal(group_vars(gt), c("x")) expect_equal(groups(gt), syms("x")) expect_equal(group_size(gt), c(3, 2, 1)) expect_equal(n_groups(gt), 3) }) test_that("has useful display methods", { expect_snapshot({ dt <- lazy_dt(mtcars, "DT") dt dt %>% group_by(vs, am) dt %>% mutate(y = 10) %>% compute("DT2") }) }) test_that("can evaluate to any data frame type", { dt <- lazy_dt(mtcars, "DT") expect_identical(class(as.data.frame(dt)), "data.frame") expect_s3_class(as.data.table(dt), "data.table") expect_s3_class(as_tibble(dt), "tbl_df") expect_s3_class(collect(dt), "tbl_df") }) test_that("compute returns lazy_dt", { dt <- lazy_dt(mtcars, "DT") dt <- summarise(dt, n = n()) dt2 <- compute(dt) expect_s3_class(dt2, "dtplyr_step") expect_equal(as.character(tbl_vars(dt2)), "n") }) test_that("collect and compute return grouped data", { dt <- group_by(lazy_dt(data.table(x = 1, y = 1), "DT"), x) expect_equal(dt %>% compute() %>% group_vars(), "x") expect_equal(dt %>% collect() %>% group_vars(), "x") }) # pull() ------------------------------------------------------------------ test_that("pull default extracts last var from data frame", { df <- lazy_dt(tibble(x = 1:10, y = 1:10), "DT") expect_equal(pull(df), 1:10) }) test_that("can extract by name, or positive/negative position", { x <- 1:10 df <- lazy_dt(tibble(x = x, y = runif(10)), "DT") expect_equal(pull(df, x), x) expect_equal(pull(df, 1), x) expect_equal(pull(df, -2L), x) }) test_that("can extract named vectors", { x <- 1:10 y <- letters[x] df <- lazy_dt(tibble(x = x, y = y), "DT") xn <- set_names(x, y) expect_equal(pull(df, x, y), xn) expect_equal(pull(df, 1, 2), xn) }) dtplyr/tests/testthat/test-step-subset-transmute.R0000644000176200001440000000422614126601265022177 0ustar liggesuserstest_that("simple calls generate expected translations", { dt <- lazy_dt(data.table(x = 1, y = 1, z = 1), "DT") expect_equal( dt %>% transmute(x) %>% show_query(), expr(DT[, .(x = x)]) ) }) test_that("transmute generates compound expression if needed", { dt <- lazy_dt(data.table(x = 1, y = 2), "DT") expect_equal( dt %>% transmute(x2 = x * 2, x4 = x2 * 2) %>% show_query(), expr(DT[, { x2 <- x * 2 x4 <- x2 * 2 .(x2, x4) }]) ) }) test_that("allows multiple assignment to the same variable", { dt <- lazy_dt(data.table(x = 1, y = 2), "DT") expect_equal( dt %>% transmute(x = x * 2, x = x * 2) %>% show_query(), expr(DT[, { x <- x * 2 x <- x * 2 .(x) }]) ) }) test_that("groups are respected", { dt <- lazy_dt(data.table(x = 1), "DT") %>% group_by(x) %>% transmute(y = 2) expect_equal(dt$vars, c("x", "y")) expect_equal( dt %>% show_query(), expr(DT[, .(y = 2), keyby = .(x)]) ) }) test_that("grouping vars can be transmuted", { dt <- lazy_dt(data.table(x = 1), "DT") %>% group_by(x) %>% transmute(x = x + 1, y = 2) expect_equal(dt$vars, c("x", "y")) expect_equal(dt$groups, "x") expect_equal( dt %>% show_query(), expr(copy(DT)[, `:=`(x = x + 1)][, .(y = 2), keyby = .(x)]) ) skip("transmuting grouping vars with nesting is not supported") dt <- lazy_dt(data.table(x = 1), "DT") %>% group_by(x) %>% transmute(x = x + 1, y = x + 1, x = y + 1) expect_equal(dt$vars, c("x", "y")) expect_equal( dt %>% collect(), tibble(x = 4, y = 3) %>% group_by(x) ) }) test_that("empty transmute works", { dt <- lazy_dt(data.frame(x = 1), "DT") expect_equal(transmute(dt) %>% show_query(), expr(DT[, 0L])) expect_equal(transmute(dt)$vars, character()) expect_equal(transmute(dt, !!!list()) %>% show_query(), expr(DT[, 0L])) dt_grouped <- lazy_dt(data.frame(x = 1), "DT") %>% group_by(x) expect_equal(transmute(dt_grouped)$vars, "x") }) test_that("only transmuting groups works", { dt <- lazy_dt(data.frame(x = 1)) %>% group_by(x) expect_equal(transmute(dt, x) %>% collect(), dt %>% collect()) expect_equal(transmute(dt, x)$vars, "x") }) dtplyr/tests/testthat/test-step-subset-expand.R0000644000176200001440000000564514126601265021442 0ustar liggesuserstest_that("expand completes all values", { tbl <- tibble(x = 1:2, y = 1:2) dt <- lazy_dt(tbl, "DT") step <- expand(dt, x, y) out <- collect(step) expect_equal( show_query(step), expr(DT[, CJ(x = x, y = y, unique = TRUE)]) ) expect_equal(step$vars, c("x", "y")) expect_equal(nrow(out), 4) }) test_that("multiple variables in one arg doesn't expand", { tbl <- tibble(x = 1:2, y = 1:2) dt <- lazy_dt(tbl, "DT") step <- expand(dt, c(x, y)) out <- collect(step) expect_equal(nrow(out), 2) }) test_that("works with unnamed vectors", { tbl <- tibble(x = 1:2, y = 1:2) dt <- lazy_dt(tbl, "DT") step <- expand(dt, x, 1:2) out <- collect(step) expect_equal( show_query(step), expr(DT[, CJ(x = x, V2 = 1:2, unique = TRUE)]) ) expect_equal(step$vars, c("x", "V2")) expect_equal(nrow(out), 4) }) test_that("works with named vectors", { tbl <- tibble(x = 1:2, y = 1:2) dt <- lazy_dt(tbl, "DT") step <- expand(dt, x, val = 1:2) out <- collect(step) expect_equal( show_query(step), expr(DT[, CJ(x = x, val = 1:2, unique = TRUE)]) ) expect_equal(step$vars, c("x", "val")) expect_equal(nrow(out), 4) }) test_that("expand respects groups", { tbl <- tibble( a = c(1L, 1L, 2L), b = c(1L, 2L, 1L), c = c(2L, 1L, 1L) ) dt <- lazy_dt(tbl, "DT") step <- dt %>% group_by(c) %>% expand(a, b) out <- collect(step) expect_equal( show_query(step), expr(DT[, CJ(a = a, b = b, unique = TRUE), keyby = .(c)]) ) expect_equal(step$vars, c("c", "a", "b")) expect_equal(out$a, c(1, 1, 2, 2, 1)) expect_equal(out$b, c(1, 2, 1, 2, 1)) }) test_that("expand handles group variables as arguments", { dt <- lazy_dt(data.frame(x = 1, y = 2, z = 3), "DT") # single group var, not redefined res <- dt %>% group_by(x) %>% expand(x, y) expect_equal( show_query(res), expr(DT[, CJ(x = x, y = y, unique = TRUE), keyby = .(x)][, `:=`("x", NULL)]) ) expect_equal( res$groups, "x" ) # multiple group vars, not redefined res <- dt %>% group_by(x, y) %>% expand(x, y, z) expect_equal( show_query(res), expr(DT[, CJ(x = x, y = y, z = z, unique = TRUE), keyby = .(x, y) ][, !!expr(!!c("x", "y") := NULL)]) ) expect_equal( res$groups, c("x", "y") ) # redefined group var res <- dt %>% group_by(x) %>% expand(x = 5, y) expect_equal( show_query(res), expr(DT[, CJ(x = 5, y = y, unique = TRUE), keyby = .(x)][, `:=`("x", NULL)]) ) expect_equal( res$groups, c("x") ) expect_equal( as_tibble(res), tibble(x = 5, y = 2) ) }) test_that("NULL inputs", { tbl <- tibble(x = 1:5) dt <- lazy_dt(tbl, "DT") step <- expand(dt, x, y = NULL) out <- collect(step) expect_equal(out, tbl) }) test_that("expand respects .name_repair", { dt <- lazy_dt(tibble(x = 1:2), "DT") suppressMessages( expect_named(dt %>% expand(x, x, .name_repair = "unique") %>% collect(), c("x...1", "x...2")) ) }) dtplyr/tests/testthat/test-tidyeval-across.R0000644000176200001440000001042714126601265021012 0ustar liggesuserstest_that("across() translates NULL", { dt <- lazy_dt(data.frame(a = 1, b = 2)) expect_equal( capture_across(dt, across(a:b)), list(a = expr(a), b = expr(b)) ) }) test_that("across() drops groups", { dt <- lazy_dt(data.frame(a = 1, b = 2)) expect_equal( capture_across(group_by(dt, a), across(everything())), list(b = expr(b)) ) expect_equal( capture_across(group_by(dt, b), across(everything())), list(a = expr(a)) ) }) test_that("across() translates character vectors", { dt <- lazy_dt(data.frame(a = 1, b = 2)) expect_equal( capture_across(dt, across(a:b, "log")), exprs(a = log(a), b = log(b)) ) expect_equal( capture_across(dt, across(a:b, "log", base = 2)), exprs(a = log(a, base = 2), b = log(b, base = 2)) ) expect_equal( capture_across(dt, across(a, c("log", "exp"))), exprs(a_log = log(a), a_exp = exp(a)) ) }) test_that("across() translates functions", { dt <- lazy_dt(data.frame(a = 1, b = 2)) expect_equal( capture_across(dt, across(a:b, log)), exprs(a = log(a), b = log(b)) ) expect_equal( capture_across(dt, across(a:b, log, base = 2)), exprs(a = log(a, base = 2), b = log(b, base = 2)) ) expect_equal( capture_across(dt, across(a, list(log, exp))), exprs(a_log = log(a), a_exp = exp(a)) ) }) test_that("across() captures anonymous functions", { dt <- lazy_dt(data.frame(a = 1)) expect_equal( capture_across(dt, across(a, function(x) log(x))), list(a = call2(function(x) log(x), quote(a))) ) }) test_that("dots are translated too", { fun <- function() { dt <- lazy_dt(data.frame(a = 1, b = 2)) z <- TRUE capture_across(dt, across(a, mean, na.rm = z)) } expect_equal(fun(), exprs(a = mean(a, na.rm = TRUE))) }) test_that("across() translates formulas", { dt <- lazy_dt(data.frame(a = 1, b = 2)) expect_equal( capture_across(dt, across(a:b, ~ log(.x))), exprs(a = log(a), b = log(b)) ) # and recursively translates expect_equal( capture_across(dt, across(a, ~ .x / n())), exprs(a = a / .N) ) expect_equal( capture_across(dt, across(a:b, ~2)), exprs(a = 2, b = 2) ) expect_equal( capture_across(dt, across(a:b, list(~log(.x)))), exprs(a = log(a), b = log(b)) ) }) test_that("across() gives informative errors", { dt <- lazy_dt(data.frame(a = 1, b = 2)) expect_snapshot(error = TRUE, { capture_across(dt, across(a, 1)) capture_across(dt, across(a, list(1))) }) }) test_that("across() can use named selections", { dt <- lazy_dt(data.frame(x = 1, y = 2)) # no fns expect_equal( capture_across(dt, across(c(a = x, b = y))), list(a = quote(x), b = quote(y)) ) expect_equal( capture_across(dt, across(all_of(c(a = "x", b = "y")))), list(a = quote(x), b = quote(y)) ) # one fn expect_equal( capture_across(dt, across(c(a = x, b = y), mean)), list(a = quote(mean(x)), b = quote(mean(y))) ) expect_equal( capture_across(dt, across(all_of(c(a = "x", b = "y")), mean)), list(a = quote(mean(x)), b = quote(mean(y))) ) # multiple fns expect_equal( capture_across(dt, across(c(a = x, b = y), list(mean, nm = sum))), list( a_mean = quote(mean(x)), a_nm = quote(sum(x)), b_mean = quote(mean(y)), b_nm = quote(sum(y)) ) ) expect_equal( capture_across(dt, across(all_of(c(a = "x", b = "y")), list(mean, nm = sum))), list( a_mean = quote(mean(x)), a_nm = quote(sum(x)), b_mean = quote(mean(y)), b_nm = quote(sum(y)) ) ) }) test_that("across() can handle empty selection", { dt <- lazy_dt(data.table(x = 1, y = 2), "DT") expect_equal( dt %>% mutate(across(character(), c)) %>% show_query(), expr(copy(DT)[, .SD]) ) }) # if_all ------------------------------------------------------------------ test_that("if_all translations names, strings, and formulas", { dt <- lazy_dt(data.frame(a = 1, b = 2)) expect_equal(capture_if_all(dt, if_all(a, is.na)), expr(is.na(a))) expect_equal(capture_if_all(dt, if_all(a, "is.na")), expr(is.na(a))) expect_equal(capture_if_all(dt, if_all(a, ~ is.na(.))), expr(is.na(a))) }) test_that("if_all collapses multiple expresions", { dt <- lazy_dt(data.frame(a = 1, b = 2)) expect_equal(capture_if_all(dt, if_all(everything(), is.na)), expr(is.na(a) & is.na(b))) }) dtplyr/tests/testthat/test-fill.R0000644000176200001440000000204214015730574016624 0ustar liggesuserstest_that("missings are filled correctly & translations are correct", { tbl <- tibble(x = c(NA, 1, NA, 2, NA, NA)) dt <- lazy_dt(tbl, "DT") step <- fill(dt, x) expect_equal(show_query(step), expr(copy(DT)[, `:=`(x = nafill(x, "locf"))])) expect_equal(collect(step)$x, c(NA, 1, 1, 2, 2, 2)) step <- fill(dt, x, .direction = "up") expect_equal(show_query(step), expr(copy(DT)[, `:=`(x = nafill(x, "nocb"))])) expect_equal(collect(step)$x, c(1, 1, 2, 2, NA, NA)) step <- fill(dt, x, .direction = 'downup') expect_equal(show_query(step), expr(copy(DT)[, `:=`(x = nafill(nafill(x, "locf"), "nocb"))])) expect_equal(collect(step)$x, c(1, 1, 1, 2, 2, 2)) step <- fill(dt, x, .direction = 'updown') expect_equal(show_query(step), expr(copy(DT)[, `:=`(x = nafill(nafill(x, "nocb"), "locf"))])) expect_equal(collect(step)$x, c(1, 1, 2, 2, 2, 2)) }) test_that("auto-conversion to lazy_dt works as intended", { dt <- data.table(x = c(NA, 1, NA, 2, NA, NA)) out <- collect(fill(dt, x)) expect_equal(out$x, c(NA, 1, 1, 2, 2, 2)) }) dtplyr/tests/testthat/test-step-set.R0000644000176200001440000000273214126601265017445 0ustar liggesuserstest_that("basic ops generate expected translation", { dt1 <- lazy_dt(data.frame(x = 1:3), "dt1") dt2 <- lazy_dt(data.frame(x = 2L), "dt2") expect_equal( dt1 %>% intersect(dt2) %>% show_query(), expr(fintersect(dt1, dt2)) ) expect_equal( dt1 %>% union(dt2) %>% show_query(), expr(funion(dt1, dt2)) ) expect_equal( dt1 %>% union_all(dt2) %>% show_query(), expr(funion(dt1, dt2, all = TRUE)) ) expect_equal( dt1 %>% setdiff(dt2) %>% show_query(), expr(fsetdiff(dt1, dt2)) ) }) test_that("basic ops work with data.table inputs", { dt1 <- data.table(x = 1:3) dt2 <- data.table(x = 2L) expect_equal( dt1 %>% intersect(dt2) %>% collect(), tibble(x = 2) ) expect_equal( dt1 %>% union(dt2) %>% collect(), tibble(x = 1:3) ) expect_equal( dt1 %>% union_all(dt2) %>% collect(), tibble(x = c(1:3, 2)) ) expect_equal( dt1 %>% setdiff(dt2) %>% collect(), tibble(x = c(1, 3)) ) }) test_that("joins captures locals from both parents", { dt1 <- lazy_dt(data.frame(x = 1)) %>% mutate(y = 1) %>% compute("D1") dt2 <- lazy_dt(data.frame(x = 1)) %>% mutate(z = 1) %>% compute("D2") expect_named(intersect(dt1, dt2)$locals, c("D1", "D2")) }) test_that("vars set correctly", { # data.table functions require the inputs to have same columns dt1 <- lazy_dt(data.frame(x = 1, y = 2), "dt1") dt2 <- lazy_dt(data.frame(x = 2, y = 2), "dt2") expect_equal(dt1 %>% union(dt2) %>% .$vars, c("x", "y")) }) dtplyr/tests/testthat/test-step-subset.R0000644000176200001440000000144414006775461020165 0ustar liggesuserstest_that("construtor has sensible defaults", { first <- step_first(data.table(x = 1), "DT") step <- step_subset(first) expect_s3_class(step, "dtplyr_step_subset") expect_equal(step$parent, first) expect_equal(step$vars, "x") expect_equal(step$groups, character()) expect_equal(step$i, NULL) expect_equal(step$j, NULL) }) test_that("generates expected calls", { first <- lazy_dt(data.table(x = 1), "DT") ungrouped <- step_subset(first, i = quote(i), j = quote(j)) expect_equal(dt_call(ungrouped), expr(DT[i, j])) with_i <- step_subset(first, i = quote(i), j = quote(j), groups = "x") expect_equal(dt_call(with_i), expr(DT[i, j, keyby = .(x)])) without_i <- step_subset(first, j = quote(j), groups = "x") expect_equal(dt_call(without_i), expr(DT[, j, keyby = .(x)])) }) dtplyr/tests/testthat/test-count.R0000644000176200001440000000446614126601265017037 0ustar liggesusers test_that("can be used grouped or ungrouped", { dt <- lazy_dt(data.table(x = c(1, 1, 1, 2)), "DT") expect_equal( dt %>% count(x) %>% collect(), tibble(x = c(1, 2), n = c(3, 1)) ) expect_equal( dt %>% group_by(x) %>% count() %>% collect(), tibble(x = c(1, 2), n = c(3, 1)) ) }) test_that("can control name", { dt <- lazy_dt(data.table(x = c(1, 1, 1, 2)), "DT") expect_equal( dt %>% count(x, name = "y") %>% collect(), tibble(x = c(1, 2), y = c(3, 1)) ) expect_snapshot( dt %>% count(name = 10) %>% collect(), error = TRUE ) }) test_that("name can match existing group var", { dt <- data.table(a = 2) expect_equal( dt %>% group_by(a) %>% tally(name = 'a') %>% collect(), tibble(a = 1) ) expect_equal( dt %>% count(a, name = 'a') %>% collect(), tibble(a = 1) ) }) test_that("can weight", { dt <- lazy_dt(data.table(x = c(1, 1, 2), y = c(1, 2, 10)), "DT") expect_equal( dt %>% count(x, wt = y) %>% collect(), tibble(x = c(1, 2), n = c(3, 10)) ) }) test_that("can sort", { dt <- lazy_dt(data.table(x = c(1, 1, 2), y = c(1, 2, 10)), "DT") expect_equal( dt %>% count(x, wt = y, sort = TRUE) %>% collect(), tibble(x = c(2, 1), n = c(10, 3)) ) }) test_that("tally works", { dt <- lazy_dt(data.table(x = c(1, 1, 1, 2)), "DT") expect_equal( dt %>% group_by(x) %>% tally() %>% collect(), tibble(x = c(1, 2), n = c(3, 1)) ) }) test_that("informs if n column already present, unless overridden", { dt <- lazy_dt(data.frame(n = c(1, 1, 2, 2, 2))) expect_message(out <- count(dt, n), "already present") expect_named(as_tibble(out), c("n", "nn")) # not a good idea, but supported expect_message(out <- count(dt, n, name = "n"), NA) expect_named(as_tibble(out), "n") expect_message(out <- count(dt, n, name = "nn"), NA) expect_named(as_tibble(out), c("n", "nn")) dt <- lazy_dt(data.frame(n = c(1, 1, 2, 2, 2), nn = 1:5)) expect_message(out <- count(dt, n), "already present") expect_named(as_tibble(out), c("n", "nn")) expect_message(out <- count(dt, n, nn), "already present") expect_named(as_tibble(out), c("n", "nn", "nnn")) }) test_that("name must be string", { dt <- lazy_dt(data.frame(x = c(1, 2))) expect_error(count(dt, x, name = 1), "string") expect_error(count(dt, x, name = letters), "string") }) dtplyr/tests/testthat/test-step-join.R0000644000176200001440000002101114127361773017610 0ustar liggesuserstest_that("dt_sources captures all tables", { dt1 <- lazy_dt(data.frame(x = 1), "dt1") dt2 <- lazy_dt(data.frame(x = 2), "dt2") dt3 <- lazy_dt(data.frame(x = 3), "dt3") out <- dt1 %>% left_join(dt2, by = "x") %>% left_join(dt3, by = "x") expect_equal( dt_sources(out)[c("dt1", "dt2", "dt3")], list(dt1 = dt1$parent, dt2 = dt2$parent, dt3 = dt3$parent) ) }) test_that("joins captures locals from both parents", { dt1 <- lazy_dt(data.frame(x = 1)) %>% mutate(y = 1) %>% compute("D1") dt2 <- lazy_dt(data.frame(x = 1)) %>% mutate(z = 1) %>% compute("D2") expect_named(left_join(dt1, dt2, by = "x")$locals, c("D1", "D2")) expect_named(inner_join(dt1, dt2, by = "x")$locals, c("D1", "D2")) }) # dplyr verbs ------------------------------------------------------------- test_that("simple usage generates expected translation", { dt1 <- lazy_dt(tibble(x = 1, y = 2, a = 3), "dt1") dt2 <- lazy_dt(tibble(x = 1, y = 2, b = 4), "dt2") expect_equal( dt1 %>% left_join(dt2, by = "x") %>% show_query(), expr( setnames( setcolorder( dt2[dt1, on = .(x), allow.cartesian = TRUE], !!c(1L, 4L, 5L, 2L, 3L) ), !!c("i.y", "y"), !!c("y.x", "y.y") ) ) ) expect_equal( dt1 %>% right_join(dt2, by = "x") %>% show_query(), expr( setnames( dt1[dt2, on = .(x), allow.cartesian = TRUE], !!c("y", "i.y"), !!c("y.x", "y.y") ) ) ) expect_equal( dt1 %>% inner_join(dt2, by = "x") %>% show_query(), expr( setnames( dt1[dt2, on = .(x), nomatch = NULL, allow.cartesian = TRUE], !!c("y", "i.y"), !!c("y.x", "y.y") ) ) ) expect_equal( dt1 %>% full_join(dt2, by = "x") %>% show_query(), expr(merge(dt1, dt2, all = TRUE, by.x = "x", by.y = "x", allow.cartesian = TRUE)) ) expect_equal( dt1 %>% anti_join(dt2, by = "x") %>% show_query(), expr(dt1[!dt2, on = .(x)]) ) expect_equal( dt1 %>% semi_join(dt2, by = "x") %>% show_query(), expr(dt1[unique(dt1[dt2, which = TRUE, nomatch = NULL, on = .(x)])]) ) }) test_that("full_join produces correct names", { # data.table: use merge which simply appends the corresponding suffix # producing duplicates # dplyr: appends suffix until name is unique df1 <- tibble(a = "a", b = "b.x", b.x = "b.x.x.x") df2 <- tibble(a = "a", b = "b.y", b.x.x = "b.x.x") dt1 <- lazy_dt(df1, "dt1") dt2 <- lazy_dt(df2, "dt2") joined_dt <- full_join(dt1, dt2, by = "a") expected <- full_join(df1, df2, by = "a") %>% colnames() expect_equal( joined_dt %>% .$vars, expected ) # suppress warning created by `data.table::merge()` expect_equal( suppressWarnings(joined_dt %>% collect() %>% colnames()), expected ) }) test_that("join can handle `by` where order doesn't match input", { dt1 <- lazy_dt(tibble(a = "a", b = "b", c = "c"), name = "dt1") dt2 <- lazy_dt(tibble(a = "a", b = "b", c = "c", d = "d"), name = "dt2") dt3 <- left_join(dt1, dt2, by = c("c", "b", "a")) expect_equal(dt3$vars, letters[1:4]) expect_equal(collect(dt3), collect(dt2)) dt4 <- full_join(dt1, dt2, by = c("c", "b", "a")) expect_equal(dt4$vars, letters[1:4]) expect_equal(collect(dt4), collect(dt2)) dt5 <- left_join(dt1, dt2, by = c("c", "b")) expect_equal( collect(dt5), tibble(a.x = "a", b = "b", c = "c", a.y = "a", d = "d") ) }) test_that("left_join produces correct names", { # data.table: uses y[x] which prefixes `x` vars with "i." and if name is not # unique it appends "." with the smallest number without a collision # dplyr: appends suffix until name is unique df1 <- tibble(a = "a", b = "b.x", i.b = "i.b") df2 <- tibble(a = "a", b = "b.y") dt1 <- lazy_dt(df1, "dt1") dt2 <- lazy_dt(df2, "dt2") joined_dt <- left_join(dt1, dt2, by = "a") expected <- left_join(df1, df2, by = "a") %>% colnames() expect_equal( joined_dt %>% .$vars, expected ) expect_equal( joined_dt %>% collect() %>% colnames(), expected ) }) test_that("named by converted to by.x and by.y", { dt1 <- lazy_dt(data.frame(a1 = 1:3, z = 1), "dt1") dt2 <- lazy_dt(data.frame(a2 = 1:3, z = 2), "dt2") out_inner <- inner_join(dt1, dt2, by = c('a1' = 'a2')) expect_equal( out_inner %>% show_query(), expr( setnames( dt1[dt2, on = .(a1 = a2), nomatch = NULL, allow.cartesian = TRUE], !!c("z", "i.z"), !!c("z.x", "z.y") ) ) ) expect_setequal(tbl_vars(out_inner), c("a1", "z.x", "z.y")) out_left <- left_join(dt1, dt2, by = c('a1' = 'a2')) expect_equal( out_left %>% show_query(), expr( setnames( setcolorder( dt2[dt1, on = .(a2 = a1), allow.cartesian = TRUE], !!c(1L, 3L, 2L) ), !!c("a2", "i.z", "z"), !!c("a1", "z.x", "z.y") ) ) ) expect_setequal(tbl_vars(out_left), c("a1", "z.x", "z.y")) }) test_that("named by can handle edge cases", { test_equal <- function(f_join) { joined_dt <- f_join(dt1, dt2, by = c("x", z = "y")) expected <- f_join(df1, df2, by = c("x", z = "y")) expect_equal( joined_dt %>% collect(), expected ) expect_equal( joined_dt$vars, colnames(expected) ) } df1 <- tibble(x = 1, y = 1, z = 2) df2 <- tibble(x = 1, y = 2) dt1 <- lazy_dt(df1, "dt1") dt2 <- lazy_dt(df2, "dt2") test_equal(left_join) test_equal(right_join) test_equal(full_join) test_equal(semi_join) test_equal(anti_join) }) test_that("setnames only used when necessary", { dt1 <- lazy_dt(data.frame(x = 1:2, a = 3), "dt1") dt2 <- lazy_dt(data.frame(x = 2:3, b = 4), "dt2") expect_equal( dt1 %>% left_join(dt2, by = "x") %>% show_query(), expr(setcolorder(dt2[dt1, on = .(x), allow.cartesian = TRUE], !!c(1L, 3L, 2L))) ) expect_equal( dt1 %>% left_join(dt2, by = "x") %>% pull(x), dt1 %>% pull(x) ) }) test_that("correctly determines vars", { dt1 <- lazy_dt(data.frame(x = 1, y = 2, a = 3), "dt1") dt2 <- lazy_dt(data.frame(x = 1, y = 2, b = 4), "dt2") expect_setequal( dt1 %>% left_join(dt2, by = c("x", "y")) %>% .$vars, c("x", "y", "a", "b") ) expect_setequal( dt1 %>% left_join(dt2, by = "x") %>% .$vars, c("x", "y.x", "y.y", "a", "b") ) expect_setequal( dt1 %>% semi_join(dt2, by = "x") %>% .$vars, c("x", "y", "a") ) }) test_that("can override suffixes", { dt1 <- lazy_dt(data.frame(x = 1, y = 2, a = 3), "dt1") dt2 <- lazy_dt(data.frame(x = 1, y = 22, b = 4), "dt2") expect_equal( dt1 %>% left_join(dt2, by = "x", suffix = c("X", "Y")) %>% show_query(), expr( setnames( setcolorder( dt2[dt1, on = .(x), allow.cartesian = TRUE], !!c(1L, 4L, 5L, 2L, 3L) ), !!c("i.y", "y"), !!c("yX", "yY") ) ) ) }) test_that("automatically data.frame converts to lazy_dt", { dt1 <- lazy_dt(data.frame(x = 1, y = 2, a = 3), "dt1") df2 <- data.frame(x = 1, y = 2, a = 3) out <- left_join(dt1, df2, by = "x") expect_s3_class(out, "dtplyr_step") }) test_that("converts other types if requested", { dt1 <- lazy_dt(data.frame(x = 1, y = 2, a = 3), "dt1") x <- structure(10, class = "foo") expect_error(left_join(dt1, x, by = "x"), "copy") expect_s3_class(left_join(dt1, x, by = "x", copy = TRUE), "dtplyr_step_join") }) test_that("mutates inside joins are copied as needed", { dt <- data.table(x = 1) lhs <- lazy_dt(dt, "dt1") %>% mutate(y = x + 1) rhs <- lazy_dt(dt, "dt2") %>% mutate(z = x + 1) collect(inner_join(lhs, rhs, by = "x")) expect_named(dt, "x") }) test_that("performs cartesian joins as needed", { x <- lazy_dt(data.frame(x = c(2, 2, 2), y = 1:3)) y <- lazy_dt(data.frame(x = c(2, 2, 2), z = 1:3)) out <- collect(left_join(x, y, by = "x")) expect_equal(nrow(out), 9) }) test_that("performs cross join", { df1 <- data.frame(x = 1:2, y = "a", stringsAsFactors = FALSE) df2 <- data.frame(x = 3:4) dt1 <- lazy_dt(df1, "dt1") dt2 <- lazy_dt(df2, "dt2") expected <- left_join(df1, df2, by = character()) %>% as_tibble() expect_snapshot(left_join(dt1, dt2, by = character())) expect_equal(left_join(dt1, dt2, by = character()) %>% collect(), expected) expect_snapshot(right_join(dt1, dt2, by = character())) expect_equal(right_join(dt1, dt2, by = character()) %>% collect(), expected) expect_snapshot(full_join(dt1, dt2, by = character())) expect_equal(full_join(dt1, dt2, by = character()) %>% collect(), expected) expect_snapshot(inner_join(dt1, dt2, by = character())) expect_equal(inner_join(dt1, dt2, by = character()) %>% collect(), expected) }) dtplyr/tests/testthat/test-step-colorder-relocate.R0000644000176200001440000000526714021443044022256 0ustar liggesuserstest_that(".before and .after relocate individual cols", { dt <- lazy_dt(data.table(x = 1, y = 1), "DT") expect_equal( dt %>% relocate(x, .after = y) %>% show_query(), expr(setcolorder(copy(DT), !!c("y", "x"))) ) expect_equal( dt %>% relocate(y, .before = x) %>% show_query(), expr(setcolorder(copy(DT), !!c("y", "x"))) ) }) test_that("can move blocks of variables", { dt <- lazy_dt(data.table(x = 1, a = 1, y = 1, b = 1), "DT") expect_equal( dt %>% relocate(y, b, .before = a) %>% show_query(), expr(setcolorder(copy(DT), !!c("x", "y", "b", "a"))) ) expect_equal( dt %>% relocate(any_of(c("y", "b")), .before = a) %>% show_query(), expr(setcolorder(copy(DT), !!c("x", "y", "b", "a"))) ) }) test_that("All columns move before (after) columns in .before (.after)", { dt <- lazy_dt(data.table(x = 1, a = 1, y = 1, b = 1), "DT") expect_equal( dt %>% relocate(y, b, .before = c(x, a)) %>% show_query(), expr(setcolorder(copy(DT), !!c("y", "b", "x", "a"))) ) expect_equal( dt %>% relocate(x, a, .after = c(y, b)) %>% show_query(), expr(setcolorder(copy(DT), !!c("y", "b", "x", "a"))) ) }) test_that("extra variables in .before/.after unaffected", { dt <- lazy_dt(data.table(a = 1, b = 1, c = 1, d = 1, e = 1), "DT") expect_equal( dt %>% relocate(b, .after = c(a, c, e)) %>% show_query(), expr(setcolorder(copy(DT), !!c("a", "c", "d", "e", "b"))) ) expect_equal( dt %>% relocate(e, .before = c(b, d)) %>% show_query(), expr(setcolorder(copy(DT), !!c("a", "e", "b", "c", "d"))) ) }) test_that("no .before/.after moves to front", { dt <- lazy_dt(data.table(x = 1, y = 2), "DT") expect_equal( dt %>% relocate(y) %>% show_query(), expr(setcolorder(copy(DT), !!c("y", "x"))) ) }) test_that("can only supply one of .before and .after", { dt <- lazy_dt(data.table(x = 1, y = 1), "DT") expect_error(relocate(dt, y, .before = x, .after = x), "only one") }) test_that("relocate() respects order specified by ...", { dt <- lazy_dt(data.table(a = 1, x = 1, b = 1, z = 1, y = 1), "DT") expect_equal( dt %>% relocate(x, y, z, .before = x) %>% show_query(), expr(setcolorder(copy(DT), !!c("a", "x", "y", "z", "b"))) ) expect_equal( dt %>% relocate(x, y, z, .after = last_col()) %>% show_query(), expr(setcolorder(copy(DT), !!c("a", "b", "x", "y", "z"))) ) expect_equal( dt %>% relocate(x, a, z) %>% show_query(), expr(setcolorder(copy(DT), !!c("x", "a", "z", "b", "y"))) ) }) test_that("relocate() only not alter grouping", { dt <- lazy_dt(data.table(x = 1, y = 1, z = 1), "DT") expect_equal( dt %>% group_by(x, y) %>% relocate(y, .before = x) %>% .$groups, c("x", "y") ) }) dtplyr/tests/testthat/test-step-mutate.R0000644000176200001440000000676014126601265020156 0ustar liggesuserstest_that("constructor has sensible defaults", { first <- step_first(data.table(x = 1), "DT") step <- step_mutate(first) expect_s3_class(step, "dtplyr_step_mutate") expect_equal(step$parent, first) expect_equal(step$vars, "x") expect_equal(step$groups, character()) expect_equal(step$new_vars, list()) }) # copies ------------------------------------------------------------------ test_that("need to copy when there's a mutate", { dt <- lazy_dt(data.table(x = 1)) expect_false(dt %>% .$needs_copy) expect_false(dt %>% filter(x == 1) %>% .$needs_copy) expect_false(dt %>% head() %>% .$needs_copy) expect_true(dt %>% mutate(y = 1) %>% .$needs_copy) expect_true(dt %>% mutate(y = 1) %>% filter(x == 1) %>% .$needs_copy) expect_true(dt %>% mutate(y = 1) %>% head() %>% .$needs_copy) }) test_that("unless there's already an implicit copy", { dt <- lazy_dt(data.table(x = 1)) expect_true(dt %>% filter(x == 1) %>% .$implicit_copy) expect_false(dt %>% filter(x == 1) %>% mutate(y = 1) %>% .$needs_copy) expect_true(dt %>% head() %>% .$implicit_copy) expect_false(dt %>% head() %>% mutate(y = 1) %>% .$needs_copy) }) # dplyr verbs ------------------------------------------------------------- test_that("generates single calls as expect", { dt <- lazy_dt(data.table(x = 1), "DT") expect_equal( dt %>% mutate(x2 = x * 2) %>% show_query(), expr(copy(DT)[, `:=`(x2 = x * 2)]) ) expect_equal( dt %>% group_by(x) %>% mutate(x2 = x * 2) %>% show_query(), expr(copy(DT)[, `:=`(x2 = x * 2), by = .(x)]) ) expect_equal( dt %>% transmute(x2 = x * 2) %>% show_query(), expr(DT[, .(x2 = x * 2)]) ) }) test_that("mutate generates compound expression if needed", { dt <- lazy_dt(data.table(x = 1, y = 2), "DT") expect_equal( dt %>% mutate(x2 = x * 2, x4 = x2 * 2) %>% show_query(), expr(copy(DT)[, c("x2", "x4") := { x2 <- x * 2 x4 <- x2 * 2 .(x2, x4) }]) ) }) test_that("allows multiple assignment to the same variable", { dt <- lazy_dt(data.table(x = 1, y = 2), "DT") expect_equal( dt %>% mutate(x = x * 2, x = x * 2) %>% show_query(), expr(copy(DT)[, c("x") := { x <- x * 2 x <- x * 2 .(x) }]) ) }) test_that("can use across", { dt <- lazy_dt(data.table(x = 1, y = 2), "DT") expect_equal( dt %>% mutate(across(everything(), ~ . + 1)) %>% show_query(), expr(copy(DT)[, `:=`(x = x + 1, y = y + 1)]) ) expect_equal( dt %>% mutate(across(.fns = ~ . + 1)) %>% show_query(), expr(copy(DT)[, `:=`(x = x + 1, y = y + 1)]) ) }) test_that("vars set correctly", { dt <- lazy_dt(data.frame(x = 1:3, y = 1:3)) expect_equal(dt %>% mutate(z = 1) %>% .$vars, c("x", "y", "z")) expect_equal(dt %>% mutate(x = NULL, z = 1) %>% .$vars, c("y", "z")) }) test_that("emtpy mutate returns input", { dt <- lazy_dt(data.frame(x = 1)) expect_equal(mutate(dt), dt) expect_equal(mutate(dt, !!!list()), dt) }) # .before and .after ----------------------------------------------------------- test_that("can use .before and .after to control column position", { dt <- lazy_dt(data.frame(x = 1, y = 2)) expect_named( mutate(dt, z = 1) %>% as_tibble(), c("x", "y", "z") ) expect_named( mutate(dt, z = 1, .before = x) %>% as_tibble(), c("z", "x", "y") ) expect_named( mutate(dt, z = 1, .after = x) %>% as_tibble(), c("x", "z", "y") ) # but doesn't affect order of existing columns expect_named( mutate(dt, x = 1, .after = y) %>% as_tibble(), c("x", "y") ) }) dtplyr/tests/testthat/test-step-modify.R0000644000176200001440000000113614006775461020145 0ustar liggesuserstest_that("group_modify creates modified data frame", { dt <- lazy_dt(data.table(g = c(1, 1, 2), x = 1:3)) foo <- function(rows, g) { list(nc = ncol(rows), nr = nrow(rows)) } out <- dt %>% group_by(g) %>% group_modify(foo) %>% collect() expect_equal(out$nc, c(1, 1)) expect_equal(out$nr, c(2, 1)) }) test_that("group_map works", { dt <- lazy_dt(data.table(g = c(1, 1, 2), x = 1:3)) out <- dt %>% group_by(g) %>% group_map(~ nrow(.)) expect_equal(out, list(2, 1)) # don't include group data out <- dt %>% group_by(g) %>% group_map(~ ncol(.)) expect_equal(out, list(1, 1)) }) dtplyr/tests/testthat/test-step-call-pivot_wider.R0000644000176200001440000001620514126601265022116 0ustar liggesuserstest_that("can pivot all cols to wide", { df <- lazy_dt(tibble(key = c("x", "y", "z"), val = 1:3), "DT") step <- pivot_wider(df, names_from = key, values_from = val) pv <- collect(step) expect_equal(step$vars, c("x", "y", "z")) expect_equal(nrow(pv), 1) expect_equal( show_query(step), expr(dcast(DT, formula = "..." ~ key, value.var = "val")[, .(x, y, z)]) ) }) test_that("non-pivoted cols are preserved", { df <- lazy_dt(tibble(a = 1, key = c("x", "y"), val = 1:2), "DT") step <- pivot_wider(df, names_from = key, values_from = val) pv <- collect(step) expect_equal(step$vars, c("a", "x", "y")) expect_equal(nrow(pv), 1) expect_equal( show_query(step), expr(dcast(DT, formula = a ~ key, value.var = "val")) ) }) test_that("implicit missings turn into explicit missings", { df <- lazy_dt(tibble(a = 1:2, key = c("x", "y"), val = 1:2)) pv <- collect(pivot_wider(df, names_from = key, values_from = val)) expect_equal(pv$a, c(1, 2)) expect_equal(pv$x, c(1, NA)) expect_equal(pv$y, c(NA, 2)) }) test_that("error when overwriting existing column", { df <- tibble( a = c(1, 1), key = c("a", "b"), val = c(1, 2) ) df <- lazy_dt(df) expect_error( pivot_wider(df, names_from = key, values_from = val), "Names must be unique" ) }) test_that("grouping is preserved", { df <- lazy_dt(tibble(g = 1, k = "x", v = 2)) out <- df %>% group_by(g) %>% pivot_wider(names_from = k, values_from = v) expect_equal(out$groups, "g") }) # https://github.com/tidyverse/tidyr/issues/804 test_that("column with `...j` name can be used as `names_from`", { df <- lazy_dt(tibble(...8 = c("x", "y", "z"), val = 1:3)) step <- pivot_wider(df, names_from = ...8, values_from = val) pv <- collect(step) expect_equal(step$vars, c("x", "y", "z")) expect_equal(nrow(pv), 1) }) # column names ------------------------------------------------------------- test_that("names_glue affects output names & auto-converts data.table to lazy_dt", { df <- lazy_dt( data.frame( x = c("X", "Y"), y = 1:2, a = 1:2, b = 1:2 ), "DT" ) step <- pivot_wider(df, names_from = x:y, values_from = a:b, names_glue = "{x}{y}_{.value}") expect_snapshot(show_query(step)) expect_equal(step$vars, c("X1_a", "Y2_a", "X1_b", "Y2_b")) }) test_that("can use names_glue without .value", { df <- lazy_dt(tibble(label = c("x", "y", "z"), val = 1:3)) step <- pivot_wider( df, names_from = label, values_from = val, names_glue = "test_{label}" ) pv <- collect(step) expect_equal(step$vars, c("test_x", "test_y", "test_z")) expect_equal(nrow(pv), 1) }) test_that("can add name prefix", { df <- lazy_dt(tibble(label = c("x", "y", "z"), val = 1:3), "DT") step <- pivot_wider( df, names_from = label, values_from = val, names_prefix = "test_" ) expect_named(collect(step), c("test_x", "test_y", "test_z")) }) test_that("can sort column names", { df <- tibble( int = c(1, 3, 2), chr = c("Wed", "Tue", "Mon"), ) df <- lazy_dt(df, "DT") step <- pivot_wider(df, names_from = chr, values_from = int, names_sort = TRUE) expect_snapshot(show_query(step)) expect_equal(step$vars, c("Mon", "Tue", "Wed")) }) test_that("can sort column names with id", { df <- tibble( id = 1:3, int = c(1, 3, 2), chr = c("Wed", "Tue", "Mon"), ) df <- lazy_dt(df, "DT") step <- pivot_wider(df, names_from = chr, values_from = int, names_sort = TRUE) expect_snapshot(show_query(step)) expect_equal(step$vars, c("id", "Mon", "Tue", "Wed")) }) test_that("can repair names if requested", { df <- lazy_dt(tibble(x = 1, lab = "x", val = 2), "DT") expect_snapshot(error = TRUE, { pivot_wider(df, names_from = lab, values_from = val) pivot_wider(df, names_from = lab, values_from = val, names_repair = "unique") }) }) test_that("can handle numeric column in names_from", { df <- lazy_dt(tibble(x = 1, name = 1, value = 2), "DT") expect_named(pivot_wider(df, names_prefix = "nm") %>% collect(), c("x", "nm1")) }) # keys --------------------------------------------------------- test_that("can override default keys", { df <- tribble( ~row, ~name, ~var, ~value, 1, "Sam", "age", 10, 2, "Sam", "height", 1.5, 3, "Bob", "age", 20, ) df <- lazy_dt(df, "DT") step <- pivot_wider(df, id_cols = name, names_from = var, values_from = value) pv <- collect(step) expect_equal(nrow(pv), 2) expect_equal( show_query(step), expr(dcast(DT, formula = name ~ var, value.var = "value")) ) }) # non-unique keys --------------------------------------------------------- test_that("warning suppressed by supplying values_fn", { df <- lazy_dt(tibble(a = c(1, 1, 2), key = c("x", "x", "x"), val = 1:3)) pv <- df %>% pivot_wider(names_from = key, values_from = val, values_fn = list(val = list)) %>% collect() expect_equal(pv$a, c(1, 2)) expect_equal(as.list(pv$x), list(c(1L, 2L), 3L)) }) test_that("values_fn can be a single function", { df <- lazy_dt(tibble(a = c(1, 1, 2), key = c("x", "x", "x"), val = c(1, 10, 100)), "DT") step <- pivot_wider(df, names_from = key, values_from = val, values_fn = sum) pv <- collect(step) expect_equal(step$vars, c("a", "x")) expect_equal(pv$x, c(11, 100)) }) test_that("values_summarize applied even when no-duplicates", { df <- lazy_dt(tibble(a = c(1, 2), key = c("x", "x"), val = 1:2)) pv <- df %>% pivot_wider(names_from = key, values_from = val, values_fn = list(val = list)) %>% collect() expect_equal(pv$a, c(1, 2)) expect_equal(as.list(pv$x), list(1L, 2L)) }) # can fill missing cells -------------------------------------------------- test_that("can fill in missing cells", { df <- lazy_dt(tibble(g = c(1, 2), var = c("x", "y"), val = c(1, 2))) widen <- function(...) { df %>% pivot_wider(names_from = var, values_from = val, ...) %>% collect() } expect_equal(widen()$x, c(1, NA)) expect_equal(widen(values_fill = 0)$x, c(1, 0)) expect_equal(widen(values_fill = list(val = 0))$x, c(1, 0)) }) test_that("values_fill only affects missing cells", { df <- lazy_dt(tibble(g = c(1, 2), names = c("x", "y"), value = c(1, NA)), "DT") step <- pivot_wider(df, names_from = names, values_from = value, values_fill = 0) out <- collect(step) expect_equal(out$y, c(0, NA)) expect_equal( show_query(step), expr(dcast(DT, formula = g ~ names, value.var = "value", fill = 0)) ) }) # multiple values ---------------------------------------------------------- test_that("can pivot from multiple measure cols", { df <- lazy_dt(tibble(row = 1, var = c("x", "y"), a = 1:2, b = 3:4)) step <- pivot_wider(df, names_from = var, values_from = c(a, b)) pv <- collect(step) expect_equal(step$vars, c("row", "a_x", "a_y", "b_x", "b_y")) expect_equal(pv$a_x, 1) expect_equal(pv$b_y, 4) }) test_that("can pivot from multiple measure cols using all keys", { df <- lazy_dt(tibble(var = c("x", "y"), a = 1:2, b = 3:4)) step <- pivot_wider(df, names_from = var, values_from = c(a, b)) pv <- collect(step) expect_equal(step$vars, c("a_x", "a_y", "b_x", "b_y")) expect_equal(pv$a_x, 1) expect_equal(pv$b_y, 4) }) dtplyr/tests/testthat/test-step-subset-summarise.R0000644000176200001440000000705314127145637022172 0ustar liggesuserstest_that("simple calls generate expected translations", { dt <- lazy_dt(data.table(x = 1, y = 1, z = 1), "DT") expect_equal( dt %>% summarise(x = mean(x)) %>% show_query(), expr(DT[, .(x = mean(x))]) ) expect_equal( dt %>% transmute(x) %>% show_query(), expr(DT[, .(x = x)]) ) }) test_that("can use with across", { dt <- lazy_dt(data.table(x = 1, y = 1, z = 1), "DT") expect_equal( dt %>% summarise(across(x:y, mean)) %>% show_query(), expr(DT[, .(x = mean(x), y = mean(y))]) ) }) test_that("can merge iff j-generating call comes after i", { dt <- lazy_dt(data.table(x = 1, y = 1, z = 1), "DT") expect_equal( dt %>% filter(x > 1) %>% summarise(y = mean(x)) %>% show_query(), expr(DT[x > 1, .(y = mean(x))]) ) expect_equal( dt %>% summarise(y = mean(x)) %>% filter(y > 1) %>% show_query(), expr(DT[, .(y = mean(x))][y > 1]) ) }) test_that("summarise peels off layer of grouping", { dt <- lazy_dt(data.table(x = 1, y = 1, z = 1)) gt <- group_by(dt, x, y) expect_equal(summarise(gt)$groups, "x") expect_equal(summarise(summarise(gt))$groups, character()) }) test_that("summarises sorts groups", { dt <- lazy_dt(data.table(x = 2:1)) expect_equal( dt %>% group_by(x) %>% summarise(n = n()) %>% pull(x), 1:2 ) }) test_that("vars set correctly", { dt <- lazy_dt(data.frame(x = 1:3, y = 1:3)) expect_equal(dt %>% summarise(z = mean(x)) %>% .$vars, "z") expect_equal(dt %>% group_by(y) %>% summarise(z = mean(x)) %>% .$vars, c("y", "z")) }) test_that("empty summarise returns unique groups", { dt <- lazy_dt(data.table(x = c(1, 1, 2), y = 1, z = 1), "DT") expect_equal( dt %>% group_by(x) %>% summarise() %>% show_query(), expr(unique(DT[, .(x)])) ) # If no groups, return null data.table expect_equal( dt %>% summarise() %>% show_query(), expr(DT[, 0L]) ) }) test_that("if for unsupported resummarise", { dt <- lazy_dt(data.frame(x = 1:3, y = 1:3)) expect_error(dt %>% summarise(x = mean(x), x2 = sd(x)), "mutate") }) test_that("summarise(.groups=)", { # the `dplyr::` prefix is needed for `check()` # should produce a message when called directly by user expect_message(eval_bare( expr(lazy_dt(data.frame(x = 1, y = 2), "DT") %>% group_by(x, y) %>% dplyr::summarise() %>% show_query()), env(global_env()) )) skip_if(utils::packageVersion("rlang") < "0.5.0") expect_snapshot(eval_bare( expr(lazy_dt(data.frame(x = 1, y = 2), "DT") %>% group_by(x, y) %>% dplyr::summarise() %>% show_query()), env(global_env()) )) # should be silent when called in another package expect_silent(eval_bare( expr(lazy_dt(data.frame(x = 1, y = 2), "DT") %>% group_by(x, y) %>% dplyr::summarise() %>% show_query()), asNamespace("testthat") )) df <- lazy_dt(data.table(x = 1, y = 2), "DT") %>% group_by(x, y) expect_equal(df %>% summarise() %>% group_vars(), "x") expect_equal(df %>% summarise(.groups = "drop_last") %>% group_vars(), "x") expect_equal(df %>% summarise(.groups = "drop") %>% group_vars(), character()) expect_equal(df %>% summarise(.groups = "keep") %>% group_vars(), c("x", "y")) expect_snapshot_error(df %>% summarise(.groups = "rowwise")) }) test_that("can change group vars", { dt <- lazy_dt(data.frame(a = 1), "DT") %>% group_by(a) res <- dt %>% summarise(a = 2) expect_equal( show_query(res), expr(DT[, .(a = 2), keyby = .(a)][, `:=`("a", NULL)]) ) expect_equal( as_tibble(res), tibble(a = 2) ) # but not with across expect_error( dt %>% summarise(across(a, ~ 2)), "Column `a` doesn't exist" ) }) dtplyr/tests/testthat/test-step-call.R0000644000176200001440000001235014126601265017562 0ustar liggesusers # head and tail ------------------------------------------------------------- test_that("simple calls generate expected results", { dt <- lazy_dt(data.table(x = 1), "DT") expect_equal( dt %>% head() %>% show_query(), expr(head(DT, n = 6L)) ) expect_equal( dt %>% tail() %>% show_query(), expr(tail(DT, n = 6L)) ) }) test_that("vars set correctly", { dt <- lazy_dt(data.frame(x = 1:3, y = 1:3)) expect_equal(dt %>% head() %>% .$vars, c("x", "y")) }) # rename ------------------------------------------------------------------ test_that("simple calls generate expected translations", { dt <- lazy_dt(data.table(x = 1, y = 1, z = 1), "DT") expect_equal( dt %>% rename(b = y) %>% show_query(), expr(setnames(copy(DT), "y", "b")) ) }) test_that("vars set correctly", { dt <- lazy_dt(data.frame(x = 1:3, y = 1:3)) expect_equal(dt %>% rename(a = x) %>% .$vars, c("a", "y")) }) test_that("empty rename returns original", { dt <- data.table(x = 1, y = 1, z = 1) lz <- lazy_dt(dt, "DT") expect_equal(lz %>% rename() %>% show_query(), expr(DT)) }) test_that("renames grouping vars", { dt <- lazy_dt(data.table(x = 1, y = 1, z = 1)) gt <- group_by(dt, x) expect_equal(rename(gt, a = x)$groups, "a") }) test_that("can rename with a function or formula", { dt <- lazy_dt(data.table(x = 1, y = 1)) expect_equal(dt %>% rename_with(toupper) %>% .$vars, c("X", "Y")) expect_equal(dt %>% rename_with(toupper, 1) %>% .$vars, c("X", "y")) expect_equal(dt %>% rename_with("toupper") %>% .$vars, c("X", "Y")) expect_equal(dt %>% rename_with(~ toupper(.x)) %>% .$vars, c("X", "Y")) }) test_that("but not with anything else", { dt <- lazy_dt(data.table(x = 1, y = 1)) expect_snapshot(error = TRUE, { dt %>% rename_with(1) }) }) test_that("rename_with generates minimal spec", { dt <- lazy_dt(matrix(ncol = 26, dimnames = list(NULL, letters)), "DT") expect_snapshot({ dt %>% rename_with(toupper) %>% show_query() dt %>% rename_with(toupper, 1:3) %>% show_query() }) }) test_that("can rename_with() a data.table", { dt <- data.table(x = 1:5, y = 1:5) out <- rename_with(dt, toupper, x) expect_s3_class(out, "dtplyr_step") expect_named(as_tibble(out), c("X", "y")) }) # distinct ---------------------------------------------------------------- test_that("no input uses all variables", { dt <- lazy_dt(data.table(x = c(1, 1), y = c(1, 2)), "dt") expect_equal( dt %>% distinct() %>% show_query(), expr(unique(dt)) ) expect_equal(dt %>% distinct() %>% .$vars, c("x", "y")) }) test_that("uses supplied variables", { dt <- lazy_dt(data.table(x = c(1, 1), y = c(1, 2)), "dt") expect_equal( dt %>% distinct(y) %>% show_query(), expr(unique(dt[, .(y)])) ) expect_equal(dt %>% distinct(y) %>% .$vars, "y") expect_equal( dt %>% group_by(x) %>% distinct(x, y) %>% show_query(), expr(unique(dt)) ) }) test_that("doesn't duplicate variables", { dt <- lazy_dt(data.table(x = c(1, 1), y = c(1, 2)), "dt") expect_equal( dt %>% distinct(x, x) %>% show_query(), expr(unique(dt[, .(x)])) ) expect_equal(dt %>% distinct(x, x) %>% .$vars, "x") expect_equal( dt %>% group_by(x) %>% distinct(x) %>% show_query(), expr(unique(dt[, .(x)])) ) }) test_that("keeps all variables if requested", { dt <- lazy_dt(data.table(x = 1, y = 1, z = 1), "dt") expect_equal( dt %>% distinct(y, .keep_all = TRUE) %>% show_query(), expr(unique(dt, by = "y")) ) expect_equal(dt %>% distinct(y, .keep_all = TRUE) %>% .$vars, c("x", "y", "z")) expect_equal( dt %>% group_by(x) %>% distinct(y, .keep_all = TRUE) %>% show_query(), expr(unique(dt, by = !!c("x", "y"))) ) }) test_that("can compute distinct computed variables", { dt <- lazy_dt(data.table(x = c(1, 1), y = c(1, 2)), "dt") expect_equal( dt %>% distinct(z = x + y) %>% show_query(), expr(unique(dt[, .(z = x + y)])) ) expect_equal( dt %>% distinct(z = x + y, .keep_all = TRUE) %>% show_query(), expr(unique(copy(dt)[, `:=`(z = x + y)], by = "z")) ) }) # unique ------------------------------------------------------------------ test_that("unique is an alias for distinct", { dt <- lazy_dt(data.table(x = c(1, 1))) expect_equal(unique(dt), distinct(dt)) }) # drop_na ------------------------------------------------------------------ test_that("empty call drops every row", { tb <- tibble(x = c(1, 2, NA), y = c("a", NA, "b")) step <- drop_na(lazy_dt(tb, "DT")) expect_equal(show_query(step), expr(na.omit(DT))) expect_equal(as_tibble(step), tb[1, ]) }) test_that("uses specified variables", { df <- tibble(x = c(1, 2, NA), y = c("a", NA, "b")) dt <- lazy_dt(df, "DT") step <- drop_na(dt, x) expect_equal(show_query(step), expr(na.omit(DT, cols = "x"))) expect_equal(collect(step), df[1:2, ]) step <- drop_na(dt, x:y) expect_equal(show_query(step), expr(na.omit(DT, cols = !!c("x", "y")))) expect_equal(collect(step), df[1, ]) }) test_that("errors are raised", { tb <- tibble(x = c(1, 2, NA), y = c("a", NA, "b")) dt <- lazy_dt(tb, "DT") expect_snapshot(collect(drop_na(dt, "z")), error = TRUE) }) test_that("converts data.table to dtplyr_step", { df <- data.table(x = c(1, 2, NA), y = c("a", NA, "b")) expect_s3_class(drop_na(df), "dtplyr_step_call") }) dtplyr/tests/testthat/test-step-subset-select.R0000644000176200001440000000273414127145637021445 0ustar liggesuserstest_that("can select variables", { dt <- lazy_dt(data.table(x = 1, y = 1, z = 1), "DT") expect_equal( dt %>% select(-z) %>% show_query(), expr(DT[, .(x, y)]) ) expect_equal( dt %>% select(a = x, y) %>% show_query(), expr(DT[, .(a = x, y)]) ) }) test_that("can merge iff j-generating call comes after i", { dt <- lazy_dt(data.table(x = 1, y = 1, z = 1), "DT") expect_equal( dt %>% filter(x > 1) %>% select(y) %>% show_query(), expr(DT[x > 1, .(y)]) ) expect_equal( dt %>% select(x = y) %>% filter(x > 1) %>% show_query(), expr(DT[, .(x = y)][x > 1]) ) }) test_that("renames grouping vars", { dt <- lazy_dt(data.table(x = 1, y = 1, z = 1)) gt <- group_by(dt, x) expect_equal(select(gt, y = x)$groups, "y") }) test_that("empty select returns no columns", { dt <- data.table(x = 1, y = 1, z = 1) lz <- lazy_dt(dt, "DT") expect_equal( lz %>% select() %>% collect(), tibble() ) # unless it's grouped skip_if(utils::packageVersion("rlang") < "0.5.0") expect_snapshot(out <- lz %>% group_by(x) %>% select()) expect_equal( out %>% collect(), group_by(tibble(x = 1), x) ) }) test_that("vars set correctly", { dt <- lazy_dt(data.frame(x = 1:3, y = 1:3)) expect_equal(dt %>% select(a = x, y) %>% .$vars, c("a", "y")) }) test_that("only add step if necessary", { dt <- lazy_dt(data.frame(x = 1:3, y = 1:3), "DT") expect_equal(dt %>% select(everything()), dt) expect_equal(dt %>% select(x, y), dt) }) dtplyr/tests/testthat/test-step-call-pivot_longer.R0000644000176200001440000000765714031070705022277 0ustar liggesuserstest_that("can pivot all cols to long", { tbl <- tibble(x = 1:2, y = 3:4) dt <- lazy_dt(tbl, "DT") step <- pivot_longer(dt, x:y) out <- collect(step) expect_equal( show_query(step), expr(melt(DT, measure.vars = !!c("x", "y"), variable.name = "name", variable.factor = FALSE)) ) expect_equal(step$vars, c("name", "value")) expect_equal(out$name, c("x", "x", "y", "y")) expect_equal(out$value, c(1, 2, 3, 4)) }) test_that("preserves original keys", { tbl <- tibble(x = 1:2, y = 2L, z = 1:2) dt <- lazy_dt(tbl, "DT") step <- pivot_longer(dt, y:z) out <- collect(step) expect_equal( show_query(step), expr(melt(DT, measure.vars = !!c("y", "z"), variable.name = "name", variable.factor = FALSE)) ) expect_equal(step$vars, c("x", "name", "value")) expect_equal(out$x, rep(tbl$x, 2)) }) test_that("can drop missing values", { tbl <- tibble(x = c(1, NA), y = c(NA, 2)) dt <- lazy_dt(tbl, "DT") step <- pivot_longer(dt, x:y, values_drop_na = TRUE) out <- collect(step) expect_equal( show_query(step), expr(melt(DT, measure.vars = !!c("x", "y"), variable.name = "name", na.rm = TRUE, variable.factor = FALSE)) ) expect_equal(out$name, c("x", "y")) expect_equal(out$value, c(1, 2)) }) test_that("can pivot to multiple measure cols", { dt <- lazy_dt(head(anscombe, 2), "DT") step <- pivot_longer( dt, everything(), names_to = c(".value", "set"), names_pattern = "(.)(.)" ) out <- collect(step) expect_snapshot(show_query(step)) expect_equal(step$vars, c("set", "x", "y")) }) test_that(".value can be at any position in `names_to`", { samp1 <- tibble( i = 1:4, y_t1 = rnorm(4), y_t2 = rnorm(4), z_t1 = rep(3, 4), z_t2 = rep(-2, 4), ) dt1 <- lazy_dt(samp1, "DT1") value_first <- dt1 %>% pivot_longer(-i, names_to = c(".value", "time"), names_sep = "_") %>% collect() samp2 <- dplyr::rename(samp1, t1_y = y_t1, t2_y = y_t2, t1_z = z_t1, t2_z = z_t2) dt2 <- lazy_dt(samp2, "DT2") value_second <- dt2 %>% pivot_longer(-i, names_to = c("time", ".value"), names_sep = "_") %>% collect() expect_identical(value_first, value_second) }) test_that("errors on unbalanced datasets", { tbl <- tibble(x_1 = 1, x_2 = 1, y_3 = 1, y_4 = 1) dt <- lazy_dt(tbl, "DT") expect_snapshot(error = TRUE, pivot_longer(dt, everything(), names_to = c(".value", "id"), names_sep = "_") ) }) test_that("can use names_prefix", { tbl <- tibble(x_x = 1:2, x_y = 3:4) dt <- lazy_dt(tbl, "DT") out <- dt %>% pivot_longer(everything(), names_prefix = "x_") %>% arrange(name, value) %>% collect() expect_equal(out$name, c("x","x","y","y")) expect_equal(out$value, c(1,2,3,4)) }) test_that("can use names_pattern w/out .value in names_to", { dt <- data.table(a1_1 = 1, b2_2 = 2) out <- dt %>% pivot_longer( cols = everything(), names_to = c("a", "b"), names_pattern = "([[:alnum:]]+)_([[:alnum:]]+)" ) %>% collect() expect_named(out, c("a", "b", "value")) expect_equal(out$a, c("a1", "b2")) expect_equal(out$b, c("1", "2")) expect_equal(out$value, c(1, 2)) }) test_that("can use names_sep w/out .value in names_to", { dt <- data.table(a1_1 = 1, b2_2 = 2) out <- dt %>% pivot_longer( cols = everything(), names_to = c("a", "b"), names_sep = "_" ) %>% collect() expect_named(out, c("a", "b", "value")) expect_equal(out$a, c("a1", "b2")) expect_equal(out$b, c("1", "2")) expect_equal(out$value, c(1, 2)) }) test_that("informative errors on unsupported features", { dt <- data.table(a1_1 = 1, b2_2 = 2) expect_snapshot(error = TRUE, { dt %>% pivot_longer(names_ptypes = list()) dt %>% pivot_longer(names_transform = list()) dt %>% pivot_longer(values_ptypes = list()) dt %>% pivot_longer(values_transform = list()) }) }) dtplyr/tests/testthat/_snaps/0000755000176200001440000000000014150760302016053 5ustar liggesusersdtplyr/tests/testthat/_snaps/step-subset-filter.md0000644000176200001440000000075414150757752022163 0ustar liggesusers# errors for named input Code filter(dt, x = 1) Error Problem with `filter()` input `..1`. x Input `..1` is named. i This usually means that you've used `=` instead of `==`. i Did you mean `x == 1`? --- Code filter(dt, y > 1, x = 1) Error Problem with `filter()` input `..2`. x Input `..2` is named. i This usually means that you've used `=` instead of `==`. i Did you mean `x == 1`? dtplyr/tests/testthat/_snaps/step-subset-summarise.md0000644000176200001440000000070714150757753022702 0ustar liggesusers# summarise(.groups=) Code eval_bare(expr(lazy_dt(data.frame(x = 1, y = 2), "DT") %>% group_by(x, y) %>% dplyr::summarise() %>% show_query()), env(global_env())) Message `summarise()` has grouped output by 'x'. You can override using the `.groups` argument. Output unique(DT) --- `.groups` can't be "rowwise" in dtplyr i Possible values are NULL (default), "drop_last", "drop", and "keep" dtplyr/tests/testthat/_snaps/tidyeval-across.md0000644000176200001440000000056714150757753021536 0ustar liggesusers# across() gives informative errors Code capture_across(dt, across(a, 1)) Error `.fns` argument to dtplyr::across() must be a NULL, a function, formula, or list Code capture_across(dt, across(a, list(1))) Error .fns argument to dtplyr::across() must contain a function or a formula x Problem with 1 dtplyr/tests/testthat/_snaps/step-join.md0000644000176200001440000000465014150757751020330 0ustar liggesusers# performs cross join Code left_join(dt1, dt2, by = character()) Output Source: local data table [4 x 3] Call: setnames(setcolorder(copy(dt2)[, `:=`(.cross_join_col = 1)][copy(dt1)[, `:=`(.cross_join_col = 1)], on = .(.cross_join_col), allow.cartesian = TRUE], c(3L, 4L, 2L, 1L)), c("i.x", "x"), c("x.x", "x.y"))[, !".cross_join_col"] x.x y x.y 1 1 a 3 2 1 a 4 3 2 a 3 4 2 a 4 # Use as.data.table()/as.data.frame()/as_tibble() to access results --- Code right_join(dt1, dt2, by = character()) Output Source: local data table [4 x 3] Call: setnames(setcolorder(copy(dt2)[, `:=`(.cross_join_col = 1)][copy(dt1)[, `:=`(.cross_join_col = 1)], on = .(.cross_join_col), allow.cartesian = TRUE], c(3L, 4L, 2L, 1L)), c("i.x", "x"), c("x.x", "x.y"))[, !".cross_join_col"] x.x y x.y 1 1 a 3 2 1 a 4 3 2 a 3 4 2 a 4 # Use as.data.table()/as.data.frame()/as_tibble() to access results --- Code full_join(dt1, dt2, by = character()) Output Source: local data table [4 x 3] Call: setnames(setcolorder(copy(dt2)[, `:=`(.cross_join_col = 1)][copy(dt1)[, `:=`(.cross_join_col = 1)], on = .(.cross_join_col), allow.cartesian = TRUE], c(3L, 4L, 2L, 1L)), c("i.x", "x"), c("x.x", "x.y"))[, !".cross_join_col"] x.x y x.y 1 1 a 3 2 1 a 4 3 2 a 3 4 2 a 4 # Use as.data.table()/as.data.frame()/as_tibble() to access results --- Code inner_join(dt1, dt2, by = character()) Output Source: local data table [4 x 3] Call: setnames(setcolorder(copy(dt2)[, `:=`(.cross_join_col = 1)][copy(dt1)[, `:=`(.cross_join_col = 1)], on = .(.cross_join_col), allow.cartesian = TRUE], c(3L, 4L, 2L, 1L)), c("i.x", "x"), c("x.x", "x.y"))[, !".cross_join_col"] x.x y x.y 1 1 a 3 2 1 a 4 3 2 a 3 4 2 a 4 # Use as.data.table()/as.data.frame()/as_tibble() to access results dtplyr/tests/testthat/_snaps/count.md0000644000176200001440000000020014150757747017540 0ustar liggesusers# can control name Code dt %>% count(name = 10) %>% collect() Error `name` must be a string dtplyr/tests/testthat/_snaps/step-call.md0000644000176200001440000000111714150757750020276 0ustar liggesusers# but not with anything else Code dt %>% rename_with(1) Error `.fn` must be a function name or formula # rename_with generates minimal spec Code dt %>% rename_with(toupper) %>% show_query() Output setnames(copy(DT), toupper) Code dt %>% rename_with(toupper, 1:3) %>% show_query() Output setnames(copy(DT), c("a", "b", "c"), toupper) # errors are raised Code collect(drop_na(dt, "z")) Error Can't subset columns that don't exist. x Column `z` doesn't exist. dtplyr/tests/testthat/_snaps/step-group.md0000644000176200001440000000040614150757751020520 0ustar liggesusers# can add groups if requested Code . <- dt %>% group_by(x) %>% group_by(y, add = TRUE) Warning The `add` argument of `group_by()` is deprecated as of dplyr 1.0.0. Please use the `.add` argument instead. dtplyr/tests/testthat/_snaps/step.md0000644000176200001440000000510714150757753017373 0ustar liggesusers# has useful display methods Code dt <- lazy_dt(mtcars, "DT") dt Output Source: local data table [32 x 11] Call: DT mpg cyl disp hp drat wt qsec vs am gear carb 1 21 6 160 110 3.9 2.62 16.5 0 1 4 4 2 21 6 160 110 3.9 2.88 17.0 0 1 4 4 3 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1 4 21.4 6 258 110 3.08 3.22 19.4 1 0 3 1 5 18.7 8 360 175 3.15 3.44 17.0 0 0 3 2 6 18.1 6 225 105 2.76 3.46 20.2 1 0 3 1 # ... with 26 more rows # Use as.data.table()/as.data.frame()/as_tibble() to access results Code dt %>% group_by(vs, am) Output Source: local data table [32 x 11] Groups: vs, am Call: DT mpg cyl disp hp drat wt qsec vs am gear carb 1 21 6 160 110 3.9 2.62 16.5 0 1 4 4 2 21 6 160 110 3.9 2.88 17.0 0 1 4 4 3 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1 4 21.4 6 258 110 3.08 3.22 19.4 1 0 3 1 5 18.7 8 360 175 3.15 3.44 17.0 0 0 3 2 6 18.1 6 225 105 2.76 3.46 20.2 1 0 3 1 # ... with 26 more rows # Use as.data.table()/as.data.frame()/as_tibble() to access results Code dt %>% mutate(y = 10) %>% compute("DT2") Output Source: local data table [32 x 12] Call: DT2 <- copy(DT)[, `:=`(y = 10)] DT2 mpg cyl disp hp drat wt qsec vs am gear carb y 1 21 6 160 110 3.9 2.62 16.5 0 1 4 4 10 2 21 6 160 110 3.9 2.88 17.0 0 1 4 4 10 3 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1 10 4 21.4 6 258 110 3.08 3.22 19.4 1 0 3 1 10 5 18.7 8 360 175 3.15 3.44 17.0 0 0 3 2 10 6 18.1 6 225 105 2.76 3.46 20.2 1 0 3 1 10 # ... with 26 more rows # Use as.data.table()/as.data.frame()/as_tibble() to access results dtplyr/tests/testthat/_snaps/step-subset-separate.md0000644000176200001440000000036414150757752022477 0ustar liggesusers# checks type of `into` and `sep` Code separate(dt, x, "x", FALSE) Error is.character(sep) is not TRUE --- Code separate(dt, x, FALSE) Error is.character(into) is not TRUE dtplyr/tests/testthat/_snaps/step-call-pivot_wider.md0000644000176200001440000000256214150757750022634 0ustar liggesusers# names_glue affects output names & auto-converts data.table to lazy_dt Code show_query(step) Output setnames(dcast(DT, formula = "..." ~ x + y, value.var = c("a", "b"))[, .(a_X_1, a_Y_2, b_X_1, b_Y_2)], c("a_X_1", "a_Y_2", "b_X_1", "b_Y_2"), c("X1_a", "Y2_a", "X1_b", "Y2_b")) # can sort column names Code show_query(step) Output dcast(DT, formula = "..." ~ chr, value.var = "int")[, .(Mon, Tue, Wed)] # can sort column names with id Code show_query(step) Output setcolorder(dcast(DT, formula = id ~ chr, value.var = "int"), c("id", "Mon", "Tue", "Wed")) # can repair names if requested Code pivot_wider(df, names_from = lab, values_from = val) Error Names must be unique. x These names are duplicated: * "x" at locations 1 and 2. Code pivot_wider(df, names_from = lab, values_from = val, names_repair = "unique") Message New names: * x -> x...1 * x -> x...2 Output Source: local data table [1 x 2] Call: setnames(dcast(copy(DT), formula = x ~ lab, value.var = "val"), 1:2, c("x...1", "x...2")) x...1 x...2 1 1 2 # Use as.data.table()/as.data.frame()/as_tibble() to access results dtplyr/tests/testthat/_snaps/step-subset-select.md0000644000176200001440000000024314150757752022146 0ustar liggesusers# empty select returns no columns Code out <- lz %>% group_by(x) %>% select() Message Adding missing grouping variables: `x` dtplyr/tests/testthat/_snaps/tidyeval.md0000644000176200001440000000037014150757754020237 0ustar liggesusers# translates lag()/lead() The `order_by` argument of `lag()` is not supported by dtplyr # desc() checks the number of arguments Code capture_dot(df, desc(a, b)) Error `desc()` expects exactly one argument. dtplyr/tests/testthat/_snaps/step-colorder.md0000644000176200001440000000033614150757750021176 0ustar liggesusers# can handle duplicate column names The column(s) x do not uniquely match a column in `x`. # checks col_order Every element of `col_order` must be unique. --- Every element of `col_order` must be unique. dtplyr/tests/testthat/_snaps/step-subset-slice.md0000644000176200001440000000110414150760302021744 0ustar liggesusers# check_slice_catches common errors Code check_slice_size(n = 1, prop = 1) Error Must supply exactly one of `n` and `prop` arguments. Code check_slice_size(n = "a") Error `n` must be a single number. Code check_slice_size(prop = "a") Error `prop` must be a single number. Code check_slice_size(n = NA) Error `n` must be a single number. Code check_slice_size(prop = NA) Error `prop` must be a single number. dtplyr/tests/testthat/_snaps/step-call-pivot_longer.md0000644000176200001440000000216714150757747023017 0ustar liggesusers# can pivot to multiple measure cols Code show_query(step) Output melt(DT, measure.vars = list(c("x1", "x2", "x3", "x4"), c("y1", "y2", "y3", "y4")), variable.name = "set", value.name = c("x", "y"), variable.factor = FALSE)[, `:=`(set = c("1", "1", "2", "2", "3", "3", "4", "4"))] # errors on unbalanced datasets Code pivot_longer(dt, everything(), names_to = c(".value", "id"), names_sep = "_") Error `data.table::melt()` doesn't currently support melting of unbalanced datasets. # informative errors on unsupported features Code dt %>% pivot_longer(names_ptypes = list()) Error `names_ptypes` is not supported by dtplyr Code dt %>% pivot_longer(names_transform = list()) Error `names_transform` is not supported by dtplyr Code dt %>% pivot_longer(values_ptypes = list()) Error `values_ptypes` is not supported by dtplyr Code dt %>% pivot_longer(values_transform = list()) Error `values_transform` is not supported by dtplyr dtplyr/tests/testthat/test-step-nest.R0000644000176200001440000000517514126601265017627 0ustar liggesuserstest_that("nest turns grouped values into one list-df", { ldt <- lazy_dt(tibble(x = c(1, 1, 1), y = 1:3), "DT") out <- nest(ldt, data = y) outc <- collect(out) expect_equal(show_query(out), expr(DT[, .(data = .(.SD)), by = .(x)])) expect_equal(group_vars(out), character()) expect_equal(out$vars, c("x", "data")) expect_equal(outc$x, 1) expect_equal(length(outc$data), 1L) expect_equal(outc$data[[1L]], data.table(y = 1:3)) }) test_that("nest uses grouping vars if present", { ldt <- lazy_dt(tibble(x = c(1, 1, 1), y = 1:3), "DT") out <- nest(dplyr::group_by(ldt, x)) expect_equal(group_vars(out), "x") expect_equal(show_query(out), expr(DT[, .(data = .(.SD)), by = .(x)])) }) test_that("provided grouping vars override grouped defaults", { ldt <- tibble(x = 1, y = 2, z = 3) %>% group_by(x) %>% lazy_dt("DT") out <- nest(ldt, data = y) expect_equal(show_query(out), expr(DT[, .(data = .(.SD)), by = .(x, z)])) expect_equal(group_vars(out), "x") expect_equal(out$vars, c("x", "z", "data")) }) test_that("puts data into the correct row", { ldt <- tibble(x = 1:3, y = c("B", "A", "A")) %>% lazy_dt() out <- nest(ldt, data = x) %>% collect() %>% dplyr::filter(y == "B") expect_equal(out$data[[1]]$x, 1) }) test_that("nesting everything yields a simple data frame", { dt <- data.table(x = 1:3, y = c("B", "A", "A")) ldt <- lazy_dt(dt, "DT") out <- nest(ldt, data = c(x, y)) expect_equal(show_query(out), expr(DT[, .(data = .(.SD))])) expect_equal(out$vars, "data") expect_equal(collect(out)$data, list(dt)) }) test_that("nest preserves order of data", { ldt <- lazy_dt(tibble(x = c(1, 3, 2, 3, 2), y = 1:5), "DT") out <- nest(ldt, data = y) expect_equal(collect(out)$x, c(1, 3, 2)) }) test_that("can strip names", { ldt <- lazy_dt(tibble(x = c(1, 1, 1), ya = 1:3, yb = 4:6), "DT") out <- nest(ldt, y = starts_with("y"), .names_sep = "") expect_equal( show_query(out), expr(DT[, .(y = .(data.table(a = ya, b = yb))), by = .(x)]) ) expect_named(collect(out)$y[[1]], c("a", "b")) }) test_that("can nest multiple columns", { ldt <- lazy_dt(tibble(x = 1, a1 = 1, a2 = 2, b1 = 1, b2 = 2), "DT") out <- ldt %>% nest(a = c(a1, a2), b = c(b1, b2)) expect_equal( show_query(out), expr(DT[, .(a = .(data.table(a1, a2)), b = .(data.table(b1, b2))), by = .(x)]) ) expect_equal(out$vars, c("x", "a", "b")) }) test_that("nesting no columns nests all inputs", { # included only for backward compatibility ldt <- lazy_dt(tibble(a1 = 1, a2 = 2, b1 = 1, b2 = 2), "DT") expect_warning(out <- nest(ldt), "must not be empty") expect_equal(show_query(out), expr(DT[, .(data = .(.SD))])) }) dtplyr/tests/testthat/test-step-subset-slice.R0000644000176200001440000001717414172100560021253 0ustar liggesusers test_that("can slice", { dt <- lazy_dt(data.table(x = 1, y = 2), "DT") expect_equal( dt %>% slice() %>% show_query(), expr(DT) ) expect_equal( dt %>% slice(c(1, 2)) %>% show_query(), expr(DT[c(1, 2)[between(c(1, 2), -.N, .N)]]) ) expect_equal( dt %>% slice(1, 2, 3) %>% show_query(), expr(DT[c(1, 2, 3)[between(c(1, 2, 3), -.N, .N)]]) ) }) test_that("can slice when grouped", { dt1 <- lazy_dt(data.table(x = c(1, 1, 2, 2), y = c(1, 2, 3, 4)), "DT") dt2 <- dt1 %>% group_by(x) %>% slice(1) expect_equal( dt2 %>% show_query(), expr(DT[DT[, .I[1[between(1, -.N, .N)]], by = .(x)]$V1]) ) expect_equal(as_tibble(dt2), tibble(x = c(1, 2), y = c(1, 3))) }) test_that("slicing doesn't sorts groups", { dt <- lazy_dt(data.table(x = 2:1)) expect_equal( dt %>% group_by(x) %>% slice(1) %>% pull(x), 2:1 ) }) test_that("doesn't return excess rows, #10", { dt <- lazy_dt(data.table(x = 1:2)) expect_equal( dt %>% slice(1:3) %>% pull(x), 1:2 ) }) # variants ---------------------------------------------------------------- test_that("functions silently truncate results", { dt <- lazy_dt(data.frame(x = 1:5)) expect_equal(dt %>% slice_head(n = 6) %>% as_tibble() %>% nrow(), 5) expect_equal(dt %>% slice_tail(n = 6) %>% as_tibble() %>% nrow(), 5) expect_equal(dt %>% slice_sample(n = 6) %>% as_tibble() %>% nrow(), 5) expect_equal(dt %>% slice_min(x, n = 6) %>% as_tibble() %>% nrow(), 5) expect_equal(dt %>% slice_max(x, n = 6) %>% as_tibble() %>% nrow(), 5) expect_equal(dt %>% slice_head(n = -6) %>% as_tibble() %>% nrow(), 0) expect_equal(dt %>% slice_tail(n = -6) %>% as_tibble() %>% nrow(), 0) expect_equal(dt %>% slice_sample(n = -6) %>% as_tibble() %>% nrow(), 0) expect_equal(dt %>% slice_min(x, n = -6) %>% as_tibble() %>% nrow(), 0) expect_equal(dt %>% slice_max(x, n = -6) %>% as_tibble() %>% nrow(), 0) }) test_that("proportion rounds down", { dt <- lazy_dt(data.frame(x = 1:10)) expect_equal(dt %>% slice_head(prop = 0.11) %>% as_tibble() %>% nrow(), 1) expect_equal(dt %>% slice_tail(prop = 0.11) %>% as_tibble() %>% nrow(), 1) expect_equal(dt %>% slice_sample(prop = 0.11) %>% as_tibble() %>% nrow(), 1) expect_equal(dt %>% slice_min(x, prop = 0.11) %>% as_tibble() %>% nrow(), 1) expect_equal(dt %>% slice_max(x, prop = 0.11) %>% as_tibble() %>% nrow(), 1) expect_equal(dt %>% slice_min(x, prop = 0.11, with_ties = FALSE) %>% as_tibble() %>% nrow(), 1) expect_equal(dt %>% slice_max(x, prop = 0.11, with_ties = FALSE) %>% as_tibble() %>% nrow(), 1) }) test_that("min and max return ties by default", { dt <- lazy_dt(data.frame(x = c(1, 1, 1, 2, 2))) expect_equal(dt %>% slice_min(x) %>% collect() %>% nrow(), 3) expect_equal(dt %>% slice_max(x) %>% collect() %>% nrow(), 2) expect_equal(dt %>% slice_min(x, with_ties = FALSE) %>% collect() %>% nrow(), 1) expect_equal(dt %>% slice_max(x, with_ties = FALSE) %>% collect() %>% nrow(), 1) }) test_that("min and max work with character", { dt <- lazy_dt(data.table(x = c("b", "a", "d", "c"))) expect_equal(dt %>% slice_min(x) %>% pull(x), "a") expect_equal(dt %>% slice_max(x) %>% pull(x), "d") }) test_that("min and max reorder results and auto-convert data.tables", { dt <- lazy_dt(data.frame(id = 1:4, x = c(2, 3, 1, 2))) expect_equal(dt %>% slice_min(x, n = 2) %>% pull(id), c(3, 1, 4)) expect_equal(dt %>% slice_min(x, n = 2, with_ties = FALSE) %>% pull(id), c(3, 1)) expect_equal(dt %>% slice_max(x, n = 2) %>% pull(id), c(2, 1, 4)) expect_equal(dt %>% slice_max(x, n = 2, with_ties = FALSE) %>% pull(id), c(2, 1)) dt <- data.table(id = 1:4, x = c(2, 3, 1, 2)) expect_equal(dt %>% slice_min(x, n = 2) %>% pull(id), c(3, 1, 4)) expect_equal(dt %>% slice_max(x, n = 2) %>% pull(id), c(2, 1, 4)) }) test_that("min and max ignore NA's (#4826)", { dt <- lazy_dt(data.frame(id = 1:4, x = c(2, NA, 1, 2), y = c(NA, NA, NA, NA))) expect_equal(dt %>% slice_min(x, n = 2) %>% pull(id), c(3, 1, 4)) expect_equal(dt %>% slice_min(y, n = 2) %>% pull(id), integer()) expect_equal(dt %>% slice_max(x, n = 2) %>% pull(id), c(1, 4)) expect_equal(dt %>% slice_max(y, n = 2) %>% pull(id), integer()) }) test_that("arguments to sample are passed along", { dt <- lazy_dt(data.frame(x = 1:100, wt = c(1, rep(0, 99)))) expect_equal(dt %>% slice_sample(n = 1, weight_by = wt) %>% pull(x), 1) expect_equal(dt %>% slice_sample(n = 2, weight_by = wt, replace = TRUE) %>% pull(x), c(1, 1)) }) test_that("slice_*() checks for empty ...", { dt <- lazy_dt(data.frame(x = 1:10)) expect_error(slice_head(dt, 5), class = "rlib_error_dots_nonempty") expect_error(slice_tail(dt, 5), class = "rlib_error_dots_nonempty") expect_error(slice_min(dt, x, 5), class = "rlib_error_dots_nonempty") expect_error(slice_max(dt, x, 5), class = "rlib_error_dots_nonempty") expect_error(slice_sample(dt, 5), class = "rlib_error_dots_nonempty") expect_error(slice_min(dt), "missing") expect_error(slice_max(dt), "missing") }) test_that("slice_*() checks for constant n= and prop=", { dt <- lazy_dt(data.frame(x = 1:10)) expect_error(slice_head(dt, n = n()), "constant") expect_error(slice_head(dt, prop = n()), "constant") expect_error(slice_tail(dt, n = n()), "constant") expect_error(slice_tail(dt, prop = n()), "constant") expect_error(slice_min(dt, x, n = n()), "constant") expect_error(slice_min(dt, x, prop = n()), "constant") expect_error(slice_max(dt, x, n = n()), "constant") expect_error(slice_max(dt, x, prop = n()), "constant") expect_error(slice_sample(dt, n = n()), "constant") expect_error(slice_sample(dt, prop = n()), "constant") }) test_that("check_slice_catches common errors", { expect_snapshot(error = TRUE, { check_slice_size(n = 1, prop = 1) check_slice_size(n = "a") check_slice_size(prop = "a") check_slice_size(n = NA) check_slice_size(prop = NA) }) }) test_that("slice_head/slice_tail correctly slice ungrouped dt when n < 0", { dt <- lazy_dt(data.frame(x = 1:10)) expect_equal( slice_head(dt, n = -2) %>% as_tibble(), slice_head(dt, n = nrow(dt) - 2) %>% as_tibble() ) expect_equal( slice_tail(dt, n = -2) %>% as_tibble(), slice_tail(dt, n = nrow(dt) - 2) %>% as_tibble() ) }) test_that("slice_head/slice_tail correctly slice grouped dt when n < 0", { dt <- data.frame(x = 1:10, g = c(rep(1, 8), rep(2, 2))) %>% lazy_dt() %>% group_by(g) expect_equal( slice_head(dt, n = -3) %>% as_tibble(), slice(dt, rlang::seq2(1L, n() - 3)) %>% as_tibble() ) expect_equal( n_groups(slice_head(dt, n = -3)), 1L ) expect_equal( slice_tail(dt, n = -3) %>% as_tibble(), slice(dt, rlang::seq2(3 + 1, n())) %>% as_tibble() ) expect_equal( n_groups(slice_tail(dt, n = -3)), 1L ) }) test_that("Non-integer number of rows computed correctly", { expect_equal(eval_tidy(get_slice_size(n = 1.6), list(.N = 10)), 1) expect_equal(eval_tidy(get_slice_size(prop = 0.16), list(.N = 10)), 1) expect_equal(eval_tidy(get_slice_size(n = -1.6), list(.N = 10)), 9) expect_equal(eval_tidy(get_slice_size(prop = -0.16), list(.N = 10)), 9) }) # sample ------------------------------------------------------------------ test_that("basic usage generates expected calls", { dt <- lazy_dt(data.table(x = 1:5, y = 1), "DT") expect_equal( dt %>% sample_n(3) %>% show_query(), expr(DT[sample(.N, 3)]) ) expect_equal( dt %>% sample_frac(0.5) %>% show_query(), expr(DT[sample(.N, .N * 0.5)]) ) expect_equal( dt %>% sample_n(3, replace = TRUE) %>% show_query(), expr(DT[sample(.N, 3, replace = TRUE)]) ) expect_equal( dt %>% sample_n(3, weight = y) %>% show_query(), expr(DT[sample(.N, 3, prob = y)]) ) }) dtplyr/tests/testthat/test-step-colorder.R0000644000176200001440000000307614126601265020465 0ustar liggesuserstest_that("can reorder columns", { dt <- lazy_dt(data.frame(x = 1:3, y = 1), "DT") expect_equal( dt %>% step_colorder(c("y", "x")) %>% show_query(), expr(setcolorder(copy(DT), !!c("y", "x"))) ) expect_named( dt %>% step_colorder(c("y", "x")) %>% collect(), c("y", "x") ) expect_equal( dt %>% step_colorder(c(2L, 1L)) %>% show_query(), expr(setcolorder(copy(DT), !!c(2L, 1L))) ) expect_named( dt %>% step_colorder(c(2L, 1L)) %>% collect(), c("y", "x") ) }) test_that("can handle duplicate column names", { dt <- lazy_dt(data.table(x = 3, x = 2, y = 1), "DT") expect_snapshot_error(dt %>% step_colorder(c("y", "x"))) expect_equal( dt %>% step_colorder(c(3L, 2L)) %>% show_query(), expr(setcolorder(copy(DT), !!c(3L, 2L))) ) expect_equal( dt %>% step_colorder(c(3L, 2L)) %>% as.data.table(), data.table(y = 1, x = 2, x = 3) ) }) test_that("checks col_order", { dt <- lazy_dt(data.frame(x = 1:3, y = 1), "DT") expect_snapshot_error(dt %>% step_colorder(c("y", "y"))) expect_snapshot_error(dt %>% step_colorder(c(1L, 1L))) }) test_that("works for empty input", { dt <- lazy_dt(data.frame(x = 1), "DT") expect_equal(dt %>% step_colorder(character()), dt) expect_equal(dt %>% step_colorder(integer()), dt) }) test_that("doesn't add step if not necessary", { dt <- lazy_dt(data.frame(x = 1, y = 2), "DT") expect_equal(dt %>% step_colorder(c("x", "y")), dt) expect_equal(dt %>% step_colorder("x"), dt) expect_equal(dt %>% step_colorder(1:2), dt) expect_equal(dt %>% step_colorder(1L), dt) }) dtplyr/tests/testthat/test-tidyeval.R0000644000176200001440000002061314127150472017520 0ustar liggesuserstest_that("simple expressions left as is", { dt <- lazy_dt(data.frame(x = 1:10, y = 1:10)) expect_equal(capture_dot(dt, NULL), NULL) expect_equal(capture_dot(dt, 10), 10) expect_equal(capture_dot(dt, x), quote(x)) expect_equal(capture_dot(dt, x + y), quote(x + y)) expect_equal(capture_dot(dt, x[[1]]), quote(x[[1]])) # logicals expect_equal(eval(capture_dot(dt, T), globalenv()), TRUE) expect_equal(eval(capture_dot(dt, F), globalenv()), FALSE) expect_equal(capture_dot(dt, TRUE), TRUE) expect_equal(capture_dot(dt, FALSE), FALSE) }) test_that("existing non-variables get inlined", { dt <- lazy_dt(data.frame(x = 1:10, y = 1:10)) n <- 10 expect_equal(capture_dot(dt, x + n), quote(x + 10)) expect_equal(capture_dot(dt, x + m), quote(x + m)) }) test_that("unless we're operating in the global environment", { dt <- lazy_dt(data.frame(x = 1:10, y = 1:10)) quo <- new_quosure(quote(x + n), globalenv()) expect_equal(capture_dot(dt, !!quo), quote(x + ..n)) expect_equal(capture_dot(dt, !!quo, j = FALSE), quote(x + n)) }) test_that("using environment of inlined quosures", { dt <- lazy_dt(data.frame(x = 1:10, y = 1:10)) n <- 10 quo <- new_quosure(quote(x + n), env(n = 20)) expect_equal(capture_dot(dt, f(!!quo)), quote(f(x + 20))) expect_equal(capture_dot(dt, f(!!quo), j = FALSE), quote(f(x + 20))) }) test_that(". gets converted to .SD", { dt <- lazy_dt(data.frame(x = 1:10, y = 1:10)) expect_equal(capture_dot(dt, .), quote(.SD)) expect_equal(capture_dot(dt, .SD), quote(.SD)) }) test_that("translate context functions", { dt <- lazy_dt(data.frame(x = 1:10, y = 1:10)) expect_equal(capture_dot(dt, cur_data()), quote(.SD)) expect_error(capture_dot(dt, cur_data_all()), "not available") expect_equal(capture_dot(dt, cur_group()), quote(.BY)) expect_equal(capture_dot(dt, cur_group_id()), quote(.GRP)) expect_equal(capture_dot(dt, cur_group_rows()), quote(.I)) }) test_that("translates if_else()/ifelse()", { df <- data.frame(x = 1:5) expect_equal( capture_dot(df, ifelse(x < 0, 1, 2)), expr(fifelse(x < 0, 1, 2)) ) expect_equal( capture_dot(df, if_else(x < 0, 1, 2)), expr(fifelse(x < 0, 1, 2)) ) # Handles unusual argument names/order expect_equal( capture_dot(df, ifelse(x < 0, n = 2, yes = 1)), expr(fifelse(x < 0, 1, 2)) ) expect_equal( capture_dot(df, if_else(x < 0, f = 2, true = 1)), expr(fifelse(x < 0, 1, 2)) ) # tidyeval works inside if_else, #220 expect_equal( capture_dot(df, if_else(.data$x < 3, 1, 2)), expr(fifelse(x < 3, 1, 2)) ) }) test_that("translates coalesce()", { df <- data.frame(x = 1:5) expect_equal( capture_dot(df, coalesce(x, 1)), expr(fcoalesce(x, 1)) ) }) test_that("can use local variable with coalesce() and replace_na()", { dt <- lazy_dt(data.frame(x = c(1, NA)), "dt") n <- 10 expect_equal( capture_dot(dt, coalesce(x, n)), expr(fcoalesce(x, 10)) ) expect_equal( capture_dot(dt, replace_na(x, n)), expr(fcoalesce(x, 10)) ) }) test_that("translates case_when()", { dt <- lazy_dt(data.frame(x = 1:10, y = 1:10)) expect_equal( capture_dot(dt, case_when(x1 ~ y1, x2 ~ y2, x3 ~ TRUE, TRUE ~ y4)), quote(fcase(x1, y1, x2, y2, x3, TRUE, rep(TRUE, .N), y4)) ) # can use T for default, #272 expect_equal( capture_dot(dt, case_when(x1 ~ y1, x2 ~ y2, x3 ~ TRUE, T ~ y4)), quote(fcase(x1, y1, x2, y2, x3, TRUE, rep(TRUE, .N), y4)) ) # translates recursively expect_equal( capture_dot(dt, case_when(x == 1 ~ n())), quote(fcase(x == 1, .N)) ) }) test_that("translates lag()/lead()", { df <- data.frame(x = 1:5, y = 1:5) expect_equal( capture_dot(df, lag(x)), expr(shift(x, type = "lag")) ) expect_equal( capture_dot(df, lead(x, 2, default = 3)), expr(shift(x, n = 2, fill = 3, type = "lead")) ) # Errors with order_by expect_snapshot_error( capture_dot(df, lag(x, order_by = y)), ) }) test_that("can use local variable with lag()/lead()", { dt <- lazy_dt(data.frame(x = c(1, NA)), "dt") n <- 10 expect_equal( capture_dot(dt, lag(x, n)), expr(shift(x, n = 10, type = "lag")) ) }) test_that("can process many expressions in one go", { dt <- lazy_dt(data.frame(x = 1:10, y = 1:10)) n <- 10 dots <- capture_dots(dt, x = x + n, y = y) expect_named(dots, c("x", "y")) expect_equal(dots$x, quote(x + 10)) }) test_that("can use anonymous functions", { dt <- lazy_dt(data.frame(x = 1:2, y = 1)) expect_equal( capture_dot(dt, x = sapply(x, function(x) x)) %>% deparse(), "sapply(x, function(x) x)" ) }) test_that("can splice a data frame", { df <- data.frame(b = rep(2, 3), c = rep(3, 3)) dots <- capture_dots(df, !!!df) expect_equal(dots, as.list(df)) }) # evaluation -------------------------------------------------------------- test_that("can access functions in local env", { dt <- lazy_dt(data.frame(g = c(1, 1, 2), x = 1:3)) f <- function(x) 100 expect_equal(dt %>% summarise(n = f()) %>% pull(), 100) }) test_that("can disambiguate using .data and .env", { dt <- lazy_dt(data.frame(x = 1)) x <- 2 expect_equal(capture_dot(dt, .data$x), quote(x)) expect_equal(capture_dot(dt, .env$x), quote(..x)) out <- dt %>% summarise(data = .data$x, env = .env$x) %>% as_tibble() expect_equal(out, tibble(data = 1, env = 2)) var <- "x" out <- dt %>% summarise(data = .data[[var]], env = .env[[var]]) %>% collect() expect_equal(out, tibble(data = 1, env = 2)) }) test_that("locals are executed before call", { dt <- lazy_dt(data.frame(x = 1, y = 2)) expect_equal( dt %>% step_locals(exprs(a = 1, b = 2, c = a + b), "c") %>% dt_eval(), 3 ) }) # dplyr verbs ------------------------------------------------------------- test_that("n() is equivalent to .N", { dt <- lazy_dt(data.frame(g = c(1, 1, 2), x = 1:3)) expect_equal( dt %>% summarise(n = n()) %>% pull(), 3L ) expect_equal( dt %>% group_by(g) %>% summarise(n = n()) %>% pull(), c(2L, 1L) ) }) test_that("row_number() is equivalent .I", { dt <- lazy_dt(data.frame(g = c(1, 1, 2), x = 1:3)) expect_equal( dt %>% mutate(n = row_number()) %>% pull(), 1:3L ) expect_equal( dt %>% group_by(g) %>% mutate(n = row_number()) %>% pull(), c(1:2, 1) ) }) test_that("row_number(x) is equivalent to rank", { dt <- lazy_dt(data.frame(x = c(10, 30, 20))) expect_equal( dt %>% mutate(n = row_number(x)) %>% pull(), c(1L, 3L, 2L) ) }) test_that("scoped verbs produce nice output", { dt <- lazy_dt(data.table(x = 1:5), "DT") expect_equal( dt %>% summarise_all(mean) %>% show_query(), expr(DT[, .(x = mean(x))]) ) expect_equal( dt %>% summarise_all(~ mean(.)) %>% show_query(), expr(DT[, .(x = mean(x))]) ) expect_equal( dt %>% summarise_all(row_number) %>% show_query(), expr(DT[, .(x = frank(x, ties.method = "first", na.last = "keep"))]) ) expect_equal( dt %>% summarise_all(~ n()) %>% show_query(), expr(DT[, .(x = .N)]) ) }) test_that("non-Gforce verbs work", { dt <- lazy_dt(data.table(x = 1:2), "DT") add <- function(x) sum(x) expect_equal(dt %>% summarise_at(vars(x), add) %>% pull(), 3) expect_equal(dt %>% mutate_at(vars(x), add) %>% pull(), c(3, 3)) }) test_that("`desc(col)` is translated to `-col` inside arrange", { dt <- lazy_dt(data.table(x = c("a", "b")), "DT") step <- arrange(dt, desc(x)) out <- collect(step) expect_equal(show_query(step), expr(DT[order(-x)])) expect_equal(out$x, c("b", "a")) }) test_that("desc() checks the number of arguments", { expect_snapshot(error = TRUE, capture_dot(df, desc(a, b))) }) test_that("n_distinct() is translated to uniqueN()", { # Works with multiple inputs expect_equal( dt_squash(expr(n_distinct(c(1, 1, 2), c(1, 2, 1)))), expr(uniqueN(data.table(c(1, 1, 2), c(1, 2, 1)))) ) # Works with single column selection (in summarise()) expect_equal( dt_squash(expr(n_distinct(x))), expr(uniqueN(x)) ) dt <- lazy_dt(data.table(x = c("a", "a", "b", NA)), "DT") step <- summarise(dt, num = n_distinct(x, na.rm = TRUE)) out <- collect(step) expect_equal( show_query(step), expr(DT[, .(num = uniqueN(x, na.rm = TRUE))]) ) expect_equal(out$num, 2) }) # fun_name ---------------------------------------------------------------- test_that("finds name of functions with GForce implementations", { expect_equal(fun_name(mean), expr(mean)) # unless overridden mean <- function() {} expect_equal(fun_name(mean), NULL) }) dtplyr/tests/testthat/test-step-group.R0000644000176200001440000000620314150757413020007 0ustar liggesuserstest_that("grouping and ungrouping adjust groups field", { dt <- lazy_dt(data.frame(x = 1:3, y = 1:3)) expect_equal(dt %>% .$groups, character()) expect_equal(dt %>% group_by(x) %>% .$groups, "x") expect_equal(dt %>% group_by(a = x) %>% .$groups, "a") expect_equal(dt %>% group_by(x) %>% group_by(y) %>% .$groups, "y") expect_equal(dt %>% group_by(x) %>% ungroup() %>% .$groups, character()) }) test_that("ungroup can remove variables from grouping", { dt <- lazy_dt(data.frame(x = 1:3, y = 1:3)) %>% group_by(x, y) expect_equal(dt %>% ungroup(y) %>% group_vars(), "x") }) test_that("can use across", { dt <- lazy_dt(data.frame(x = 1:3, y = 1:3)) expect_equal(dt %>% group_by(across(everything())) %>% .$groups, c("x", "y")) }) test_that("can add groups if requested", { dt <- lazy_dt(data.frame(x = 1:3, y = 1:3), "DT") expect_equal( dt %>% group_by(x) %>% group_by(y, .add = TRUE) %>% .$groups, c("x", "y") ) expect_snapshot({ . <- dt %>% group_by(x) %>% group_by(y, add = TRUE) }) }) test_that("grouping can compute new variables if needed", { dt <- lazy_dt(data.frame(x = 1:3, y = 1:3), "DT") expect_equal( dt %>% group_by(xy = x + y) %>% show_query(), expr(copy(DT)[, `:=`(xy = x + y)]) ) # also works when RHS is only a symbol expect_equal( dt %>% group_by(z = x) %>% show_query(), expr(copy(DT)[, `:=`(z = x)]) ) expect_equal( dt %>% group_by(xy = x + y) %>% summarise(x = mean(x)) %>% show_query(), expr(copy(DT)[, `:=`(xy = x + y)][, .(x = mean(x)), keyby = .(xy)]) ) }) test_that("vars set correctly", { dt <- lazy_dt(data.frame(x = 1:3, y = 1:3)) expect_equal(dt %>% group_by(x) %>% .$vars, c("x", "y")) }) test_that("`key` switches between keyby= and by=", { dt <- lazy_dt(data.frame(x = 1:3, y = 1:3), "DT") dt1 <- lazy_dt(mtcars, "DT1") expect_equal( dt %>% group_by(xy = x + y, arrange = FALSE) %>% summarize(x = mean(x)) %>% show_query(), expr(copy(DT)[, `:=`(xy = x + y)][, .(x = mean(x)), by = .(xy)]) ) expect_equal( dt1 %>% group_by(cyl, arrange = FALSE) %>% summarize(mean_mpg = mean(mpg)) %>% show_query(), expr(DT1[, .(mean_mpg = mean(mpg)), by = .(cyl)]) ) expect_equal( dt1 %>% group_by(cyl) %>% summarize(mean_mpg = mean(mpg)) %>% show_query(), expr(DT1[, .(mean_mpg = mean(mpg)), keyby = .(cyl)]) ) }) test_that("emtpy and NULL group_by ungroups", { dt <- lazy_dt(data.frame(x = 1)) %>% group_by(x) expect_equal(group_by(dt) %>% group_vars(), character()) expect_equal(group_by(dt, NULL) %>% group_vars(), character()) expect_equal(group_by(dt, !!!list()) %>% group_vars(), character()) }) test_that("only adds step if necessary", { dt <- lazy_dt(data.table(x = 1, y = 1), "DT") expect_equal(dt %>% group_by(), dt) expect_equal(dt %>% ungroup(), dt) expect_equal(dt %>% ungroup(x), dt) dt_grouped <- dt %>% group_by(x) dt_grouped2 <- dt_grouped %>% group_by(x) expect_equal(dt_grouped, dt_grouped2) expect_equal(dt_grouped %>% ungroup(y), dt_grouped) out <- dt_grouped %>% mutate(y = y - mean(y)) %>% group_by() expect_s3_class(out, "dtplyr_step_group") expect_equal(group_vars(out), character()) }) dtplyr/tests/testthat/test-step-subset-arrange.R0000644000176200001440000000255314126601265021575 0ustar liggesuserstest_that("arrange orders variables", { dt <- lazy_dt(data.table(x = 1, y = 1, z = 1), "DT") expect_equal( dt %>% arrange(x) %>% show_query(), expr(DT[order(x)]) ) }) test_that("arrange doesn't use, but still preserves, grouping", { dt <- group_by(lazy_dt(data.table(x = 1, y = 2), "DT"), x) step <- arrange(dt, y) expect_equal(step$groups, "x") expect_equal(dt_call(step), expr(DT[order(y)])) step2 <- arrange(dt, y, .by_group = TRUE) expect_equal(dt_call(step2), expr(DT[order(x, y)])) }) test_that("empty arrange returns input unchanged", { dt <- lazy_dt(data.table(x = 1, y = 1, z = 1), "DT") expect_true(identical(arrange(dt), dt)) }) test_that("can use with across", { dt <- lazy_dt(data.table(x = 1, y = 1, z = 1), "DT") expect_equal( dt %>% arrange(across(x:y)) %>% show_query(), expr(DT[order(x, y)]) ) }) test_that("vars set correctly", { dt <- lazy_dt(data.frame(x = 1:3, y = 1:3)) expect_equal(dt %>% arrange(x) %>% .$vars, c("x", "y")) }) test_that("desc works with internal quosure", { dt <- lazy_dt(data.table(x = c(4,3,9,7), y = 1:4)) desc_df <- dt %>% arrange(desc(!!quo(x))) %>% collect() expect_equal(desc_df$x, c(9,7,4,3)) }) test_that("only add step if necessary", { dt <- lazy_dt(data.frame(x = 1:3, y = 1:3)) expect_equal(dt %>% arrange(), dt) expect_equal(dt %>% arrange(!!!list()), dt) }) dtplyr/tests/testthat/test-replace_na.R0000644000176200001440000000210314021443044017753 0ustar liggesusers# lazy data.tables ----------------------------------------------------------- test_that("empty call does nothing", { tbl <- tibble(x = c(1, NA)) dt <- lazy_dt(tbl, "DT") out <- collect(replace_na(dt)) expect_equal(out, tbl) }) test_that("missing values are replaced", { tbl <- tibble(x = c(1, NA)) dt <- lazy_dt(tbl, "DT") step <- replace_na(dt, list(x = 0)) out <- collect(step) expect_equal(show_query(step), expr(copy(DT)[, `:=`(x = fcoalesce(x, 0))])) expect_equal(out$x, c(1, 0)) }) test_that("don't complain about variables that don't exist", { tbl <- tibble(a = c(1, NA)) dt <- lazy_dt(tbl, "DT") out <- collect(replace_na(dt, list(a = 100, b = 0))) expect_equal(out, tibble(a = c(1, 100))) }) # Inside mutate() ----------------------------------------------------------- test_that("missing values are replaced", { tbl <- tibble(x = c(1, NA)) dt <- lazy_dt(tbl, "DT") step <- mutate(dt, x = replace_na(x, 0)) out <- collect(step) expect_equal(show_query(step), expr(copy(DT)[, `:=`(x = fcoalesce(x, 0))])) expect_equal(out$x, c(1, 0)) }) dtplyr/tests/testthat/test-step-subset-do.R0000644000176200001440000000052414006775461020563 0ustar liggesuserstest_that("basic operation as expected", { dt <- lazy_dt(data.frame(g = c(1, 1, 2), x = 1:3), "DT") expect_equal( dt %>% do(y = ncol(.)) %>% show_query(), expr(DT[, .(y = .(ncol(.SD)))]) ) expect_equal( dt %>% group_by(g) %>% do(y = ncol(.)) %>% show_query(), expr(DT[, .(y = .(ncol(.SD))), keyby = .(g)]) ) }) dtplyr/tests/testthat/test-step-subset-filter.R0000644000176200001440000000573214126601265021445 0ustar liggesuserstest_that("can filter by value", { dt <- lazy_dt(data.table(x = 1, y = 1, z = 1), "DT") expect_equal( dt %>% filter() %>% show_query(), expr(DT) ) expect_equal( dt %>% filter(x) %>% show_query(), expr(DT[(x)]) ) expect_equal( dt %>% filter(x > 1) %>% show_query(), expr(DT[x > 1]) ) expect_equal( dt %>% filter(x > 1, y > 2) %>% show_query(), expr(DT[x > 1 & y > 2]) ) }) test_that("can filter with logical columns", { dt <- lazy_dt(data.table(x = c(TRUE, FALSE)), "DT") expect_equal( dt %>% filter(x) %>% show_query(), expr(DT[(x)]) ) expect_equal( dt %>% filter(!x) %>% show_query(), expr(DT[(!x)]) ) }) test_that("inlines external variables", { dt <- lazy_dt(data.table(x = 1), "DT") l <- c(1, 10) expect_equal( dt %>% filter(x %in% l) %>% show_query(), quote(DT[x %in% !!l]) ) # Except in the global environment # But I can't figure out how to test this - it's not too important # as it only affects the quality of the translation not the correctness }) test_that("can use with across", { dt <- lazy_dt(data.table(x = 1, y = 1, z = 1), "DT") expect_equal( dt %>% filter(across(x:y, ~ . > 0)) %>% show_query(), expr(DT[x > 0 & y > 0]) ) expect_equal( dt %>% filter(if_all(x:y, ~ . > 0)) %>% show_query(), expr(DT[x > 0 & y > 0]) ) expect_equal( dt %>% filter(if_any(x:y, ~ . > 0)) %>% show_query(), expr(DT[x > 0 | y > 0]) ) # .cols defaults to everything() expect_equal( dt %>% filter(if_all(.fns = ~ . > 0)) %>% show_query(), expr(DT[x > 0 & y > 0 & z > 0]) ) expect_equal( dt %>% filter(if_any(.fns = ~ . > 0)) %>% show_query(), expr(DT[x > 0 | y > 0 | z > 0]) ) }) test_that("can filter when grouped", { dt1 <- lazy_dt(data.table(x = c(1, 1, 2, 2), y = c(1, 2, 3, 4)), "DT") dt2 <- dt1 %>% group_by(x) %>% filter(sum(y) == 3) expect_equal( dt2 %>% show_query(), expr(DT[DT[, .I[sum(y) == 3], by = .(x)]$V1]) ) expect_equal(as_tibble(dt2), tibble(x = c(1, 1), y = c(1, 2))) }) test_that("grouped filter doesn't reorder", { dt1 <- lazy_dt(data.frame(x = c(2, 2, 1, 1), y = 1:4), "DT") dt2 <- dt1 %>% group_by(x) %>% filter(TRUE) expect_equal( dt2 %>% show_query(), expr(DT[DT[, .I[TRUE], by = .(x)]$V1]) ) expect_equal(dt2 %>% as_tibble(), as_tibble(dt1)) }) test_that("only adds step if dots are not empty", { dt <- lazy_dt(data.table(x = 1), "DT") expect_equal(dt %>% filter(), dt) expect_equal(dt %>% filter(!!!list()), dt) }) test_that("errors for named input", { dt <- lazy_dt(data.table(x = 1, y = 2), "DT") expect_snapshot(error = TRUE, filter(dt, x = 1)) expect_snapshot(error = TRUE, filter(dt, y > 1, x = 1)) }) test_that("allows named constants that resolve to logical vectors", { dt <- lazy_dt(mtcars, "DT") filters <- mtcars %>% transmute( cyl %in% 6:8, hp / drat > 50 ) expect_equal( filter(dt, !!!filters), filter(dt, !!!unname(filters)) ) }) dtplyr/tests/testthat.R0000644000176200001440000000007012667402221014714 0ustar liggesuserslibrary(testthat) library(dtplyr) test_check("dtplyr") dtplyr/vignettes/0000755000176200001440000000000014172104725013603 5ustar liggesusersdtplyr/vignettes/translation.Rmd0000644000176200001440000001764414126601265016621 0ustar liggesusers--- title: "Translation" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{translation} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ``` ## Introduction This vignette shows the details of how dtplyr translates dplyr expressions into the equivalent [data.table](http://r-datatable.com/) code. If you see places where you think I could generate better data.table code, please [let me know](https://github.com/tidyverse/dtplyr/issues)! This document assumes that you're familiar with the basics of data.table; if you're not, I recommend starting at `vignette("datatable-intro.html")`. ```{r setup, message = FALSE} library(dtplyr) library(data.table) library(dplyr) ``` ## The basics To get started, I'll create a simple lazy table with `lazy_dt()`: ```{r} df <- data.frame(a = 1:5, b = 1:5, c = 1:5, d = 1:5) dt <- lazy_dt(df) ``` The actual data doesn't matter here since we're just looking at the translation. When you print a lazy frame, it tells you that it's a local data table with four rows. It also prints the call that dtplyr will evaluate when we execute the lazy table. In this case it's very simple: ```{r} dt ``` If we just want to see the generated code, you can use `show_query()`. I'll use that a lot in this vignette. ```{r} dt %>% show_query() ``` ## Simple verbs Many dplyr verbs have a straightforward translation to either the `i` or `j` component of `[.data.table`. ### `filter()` and `arrange()` `filter()` and `arrange()` become elements of `i`: ```{r} dt %>% arrange(a, b, c) %>% show_query() dt %>% filter(b == c) %>% show_query() dt %>% filter(b == c, c == d) %>% show_query() ``` ### `select()`, `summarise()`, `transmute()` `select()`, `summarise()` and `transmute()` all become elements of `j`: ```{r} dt %>% select(a:b) %>% show_query() dt %>% summarise(a = mean(a)) %>% show_query() dt %>% transmute(a2 = a * 2) %>% show_query() ``` `mutate()` also uses the `j` component with data.table's special `:=` operator: ```{r} dt %>% mutate(a2 = a * 2, b2 = b * 2) %>% show_query() ``` Note that dplyr will not copy the input data by default, see below for more details. `mutate()` allows to refer to variables that you just created using an "extended `j`" expression: ```{r} dt %>% mutate(a2 = a * 2, b2 = b * 2, a4 = a2 * 2) %>% show_query() ``` `transmute()` works similarly: ```{r} dt %>% transmute(a2 = a * 2, b2 = b * 2, a4 = a2 * 2) %>% show_query() ``` ## Other calls Other verbs require calls to other functions: ### `rename()` `rename()` uses `setnames()`: ```{r} dt %>% rename(x = a, y = b) %>% show_query() ``` ### `distinct()` `distinct()` uses `unique()`: ```{r} dt %>% distinct() %>% show_query() dt %>% distinct(a, b) %>% show_query() dt %>% distinct(a, b, .keep_all = TRUE) %>% show_query() ``` `distinct()` on a computed column uses an intermediate mutate: ```{r} dt %>% distinct(c = a + b) %>% show_query() dt %>% distinct(c = a + b, .keep_all = TRUE) %>% show_query() ``` ### Joins Most joins use the `[.data.table` equivalent: ```{r} dt2 <- lazy_dt(data.frame(a = 1)) dt %>% inner_join(dt2, by = "a") %>% show_query() dt %>% right_join(dt2, by = "a") %>% show_query() dt %>% left_join(dt2, by = "a") %>% show_query() dt %>% anti_join(dt2, by = "a") %>% show_query() ``` But `full_join()` uses `merge()` ```{r} dt %>% full_join(dt2, by = "a") %>% show_query() ``` In some case extra calls to `data.table::setcolorder()` and `data.table::setnames()` are required to ensure correct column order and names in: ```{r} dt3 <- lazy_dt(data.frame(b = 1, a = 1)) dt %>% left_join(dt3, by = "a") %>% show_query() dt %>% full_join(dt3, by = "b") %>% show_query() ``` Semi-joins are little more complex: ```{r} dt %>% semi_join(dt2, by = "a") %>% show_query() ``` ### Set operations Set operations use the fast data.table alternatives: ```{r} dt %>% intersect(dt2) %>% show_query() dt %>% setdiff(dt2) %>% show_query() dt %>% union(dt2) %>% show_query() ``` ## Grouping Just like in dplyr, `group_by()` doesn't do anything by itself, but instead modifies the operation of downstream verbs. This generally just involves using the `keyby` argument: ```{r} dt %>% group_by(a) %>% summarise(b = mean(b)) %>% show_query() ``` You may use `by` instead of `keyby` if you set `arrange = FALSE`: ```{r} dt %>% group_by(a, arrange = FALSE) %>% summarise(b = mean(b)) %>% show_query() ``` Often, there won't be too much of a difference between these, but for larger grouped operations, the overhead of reordering data may become significant. In these situations, using `arrange = FALSE` becomes preferable. The primary exception is grouped `filter()`, which requires the use of `.SD`: ```{r} dt %>% group_by(a) %>% filter(b < mean(b)) %>% show_query() ``` ## Combinations dtplyr tries to generate generate data.table code as close as possible to what you'd write by hand, as this tends to unlock data.table's tremendous speed. For example, if you `filter()` and then `select()`, dtplyr generates a single `[`: ```{r} dt %>% filter(a == 1) %>% select(-a) %>% show_query() ``` And similarly when combining filtering and summarising: ```{r} dt %>% group_by(a) %>% filter(b < mean(b)) %>% summarise(c = max(c)) %>% show_query() ``` This is particularly nice when joining two tables together because you can select variables after you have joined and data.table will only carry those into the join: ```{r} dt3 <- lazy_dt(data.frame(x = 1, y = 2)) dt4 <- lazy_dt(data.frame(x = 1, a = 2, b = 3, c = 4, d = 5, e = 7)) dt3 %>% left_join(dt4) %>% select(x, a:c) %>% show_query() ``` Note, however, that `select()`ing and then `filter()`ing must generate two separate calls to `[`, because data.table evaluates `i` before `j`. ```{r} dt %>% select(X = a, Y = b) %>% filter(X == 1) %>% show_query() ``` Similarly, a `filter()` and `mutate()` can't be combined because `dt[a == 1, .(b2 := b * 2)]` would modify the selected rows in place: ```{r} dt %>% filter(a == 1) %>% mutate(b2 = b * 2) %>% show_query() ``` ## Copies By default dtplyr avoids mutating the input data, automatically creating a `copy()` if needed: ```{r} dt %>% mutate(a2 = a * 2, b2 = b * 2) %>% show_query() ``` Note that dtplyr does its best to avoid needless copies, so it won't explicitly copy if there's already an implicit copy produced by `[`, `head()`, `merge()` or similar: ```{r} dt %>% filter(x == 1) %>% mutate(a2 = a * 2, b2 = b * 2) %>% show_query() ``` You can choose to opt out of this copy, and take advantage of data.table's reference semantics (see `vignette("datatable-reference-semantics")` for more details). Do this by setting `immutable = FALSE` on construction: ```{r} dt2 <- data.table(a = 1:10) dt_inplace <- lazy_dt(dt2, immutable = FALSE) dt_inplace %>% mutate(a2 = a * 2, b2 = b * 2) %>% show_query() ``` ## Performance There are two components to the performance of dtplyr: how long it takes to generate the translation, and how well the translation performs. Given my explorations so far, I'm reasonably confident that we're generating high-quality data.table code, so most of the cost should be in the translation itself. The following code briefly explores the performance of a few different translations. A signficant amount of work is done by the dplyr verbs, so we benchmark the whole process. ```{r} bench::mark( filter = dt %>% filter(a == b, c == d), mutate = dt %>% mutate(a = a * 2, a4 = a2 * 2, a8 = a4 * 2) %>% show_query(), summarise = dt %>% group_by(a) %>% summarise(b = mean(b)) %>% show_query(), check = FALSE )[1:6] ``` These translations all take less than a millisecond, suggesting that the performance overhead of dtplyr should be negligible for realistic data sizes. Note that dtplyr run-time scales with the complexity of the pipeline, not the size of the data, so these timings should apply regardless of the size of the underlying data[^copy]. [^copy]: Unless a copy is performed. dtplyr/R/0000755000176200001440000000000014150760302011767 5ustar liggesusersdtplyr/R/count.R0000644000176200001440000000353014126601265013250 0ustar liggesusers#' Count observations by group #' #' This is a method for the dplyr [count()] generic. It is translated using #' `.N` in the `j` argument, and supplying groups to `keyby` as appropriate. #' #' @param .data A [lazy_dt()] #' @inheritParams dplyr::count #' @importFrom dplyr count #' @export #' @examples #' library(dplyr, warn.conflicts = FALSE) #' #' dt <- lazy_dt(dplyr::starwars) #' dt %>% count(species) #' dt %>% count(species, sort = TRUE) #' dt %>% count(species, wt = mass, sort = TRUE) count.dtplyr_step <- function(.data, ..., wt = NULL, sort = FALSE, name = NULL) { if (!missing(...)) { out <- group_by(.data, ..., .add = TRUE) } else { out <- .data } tally(out, wt = !!enquo(wt), sort = sort, name = name) } #' @export count.data.table <- function(.data, ...) { .data <- lazy_dt(.data) count(.data, ...) } #' @importFrom dplyr tally #' @export tally.dtplyr_step <- function(.data, wt = NULL, sort = FALSE, name = NULL) { wt <- enquo(wt) if (quo_is_null(wt)) { n <- expr(n()) } else { n <- expr(sum(!!wt, na.rm = TRUE)) } name <- check_name(name, .data$groups) out <- summarise(.data, !!name := !!n) if (sort) { out <- arrange(out, desc(!!sym(name))) } out } #' @export tally.data.table <- function(.data, ...) { .data <- lazy_dt(.data) tally(.data, ...) } # Helpers ----------------------------------------------------------------- check_name <- function(name, vars) { if (is.null(name)) { name <- n_name(vars) if (name != "n") { inform(c( glue::glue("Storing counts in `{name}`, as `n` already present in input"), i = "Use `name = \"new_name\"` to pick a new name." )) } } else if (!is_string(name)) { abort("`name` must be a string") } name } n_name <- function(x) { name <- "n" while (name %in% x) { name <- paste0("n", name) } name } dtplyr/R/step-setnames.R0000644000176200001440000000151314126601265014707 0ustar liggesusersstep_setnames <- function(x, old, new, in_place, rename_groups = FALSE) { stopifnot(is_step(x)) stopifnot(is.character(old) || is.integer(old)) stopifnot(is.character(new)) stopifnot(length(old) == length(new)) stopifnot(is_bool(in_place)) stopifnot(is_bool(rename_groups)) if (is.integer(old)) { locs <- old } else { locs <- vctrs::vec_match(old, x$vars) } name_changed <- x$vars[locs] != new old <- old[name_changed] new <- new[name_changed] locs <- locs[name_changed] if (length(old) == 0) { return(x) } new_vars <- x$vars new_vars[locs] <- new out <- step_call(x, "setnames", args = list(old, new), vars = new_vars, in_place = in_place ) if (rename_groups) { groups <- rename_groups(x$groups, set_names(old, new)) out <- step_group(out, groups) } out } dtplyr/R/step-mutate.R0000644000176200001440000000717314126601265014377 0ustar liggesusersstep_mutate <- function(parent, new_vars = list(), nested = FALSE) { vars <- union(parent$vars, names(new_vars)) vars <- setdiff(vars, names(new_vars)[vapply(new_vars, is_null, lgl(1))]) new_step( parent, vars = vars, groups = parent$groups, arrange = parent$arrange, needs_copy = !parent$implicit_copy, new_vars = new_vars, nested = nested, class = "dtplyr_step_mutate" ) } dt_call.dtplyr_step_mutate <- function(x, needs_copy = x$needs_copy) { # i is always empty because we never mutate a subset if (is_empty(x$new_vars)) { j <- quote(.SD) } else if (!x$nested) { j <- call2(":=", !!!x$new_vars) } else { mutate_list <- mutate_nested_vars(x$new_vars) j <- call2(":=", call2("c", !!!mutate_list$new_vars), mutate_list$expr) } out <- call2("[", dt_call(x$parent, needs_copy), , j) add_grouping_param(out, x, arrange = FALSE) } mutate_nested_vars <- function(mutate_vars) { assign <- map2(syms(names(mutate_vars)), mutate_vars, function(x, y) call2("<-", x, y)) new_vars <- unique(names(mutate_vars)) output <- call2(".", !!!syms(new_vars)) list( expr = call2("{", !!!assign, output), new_vars = new_vars ) } # dplyr methods ----------------------------------------------------------- #' Create and modify columns #' #' This is a method for the dplyr [mutate()] generic. It is translated to #' the `j` argument of `[.data.table`, using `:=` to modify "in place". If #' `.before` or `.after` is provided, the new columns are relocated with a call #' to [data.table::setcolorder()]. #' #' @param .data A [lazy_dt()]. #' @param ... <[data-masking][dplyr::dplyr_data_masking]> Name-value pairs. #' The name gives the name of the column in the output, and the value should #' evaluate to a vector. #' @param .before,.after \Sexpr[results=rd]{lifecycle::badge("experimental")} #' <[`tidy-select`][dplyr_tidy_select]> Optionally, control where new columns #' should appear (the default is to add to the right hand side). See #' [relocate()] for more details. #' @importFrom dplyr mutate #' @export #' @examples #' library(dplyr, warn.conflicts = FALSE) #' #' dt <- lazy_dt(data.frame(x = 1:5, y = 5:1)) #' dt %>% #' mutate(a = (x + y) / 2, b = sqrt(x^2 + y^2)) #' #' # It uses a more sophisticated translation when newly created variables #' # are used in the same expression #' dt %>% #' mutate(x1 = x + 1, x2 = x1 + 1) mutate.dtplyr_step <- function(.data, ..., .before = NULL, .after = NULL) { dots <- capture_dots(.data, ...) if (is_null(dots)) { return(.data) } nested <- nested_vars(.data, dots, .data$vars) out <- step_mutate(.data, dots, nested) .before <- enquo(.before) .after <- enquo(.after) if (!quo_is_null(.before) || !quo_is_null(.after)) { # Only change the order of new columns new <- setdiff(names(dots), .data$vars) out <- relocate(out, !!new, .before = !!.before, .after = !!.after) } out } #' @export mutate.data.table <- function(.data, ...) { .data <- lazy_dt(.data) mutate(.data, ...) } nested_vars <- function(.data, dots, all_vars) { new_vars <- character() all_new_vars <- unique(names(dots)) init <- 0L for (i in seq_along(dots)) { cur_var <- names(dots)[[i]] used_vars <- all_names(get_expr(dots[[i]])) if (any(used_vars %in% new_vars)) { return(TRUE) } else { new_vars <- c(new_vars, cur_var) } } FALSE } # Helpers ----------------------------------------------------------------- all_names <- function(x) { if (is.name(x)) return(as.character(x)) if (!is.call(x)) return(NULL) unique(unlist(lapply(x[-1], all_names), use.names = FALSE)) } dtplyr/R/utils.R0000644000176200001440000000137614126601265013266 0ustar liggesuserscat_line <- function(...) cat(paste(..., "\n", collapse = "", sep = "")) # nocov start - compat-purrr.R imap <- function(.x, .f, ...) { map2(.x, names(.x) %||% seq_along(.x), .f, ...) } map2 <- function(.x, .y, .f, ...) { .f <- as_function(.f, env = global_env()) out <- mapply(.f, .x, .y, MoreArgs = list(...), SIMPLIFY = FALSE) if (length(out) == length(.x)) { set_names(out, names(.x)) } else { set_names(out, NULL) } } # nocov end # nocov start - compat-tidyr.R strip_names <- function(df, base, names_sep) { base <- paste0(base, names_sep) names <- names(df) has_prefix <- startsWith(names, base) names[has_prefix] <- substr(names[has_prefix], nchar(base) + 1, nchar(names[has_prefix])) set_names(df, names) } # nocov end dtplyr/R/zzz.R0000644000176200001440000000423214126601265012755 0ustar liggesusers# nocov start .onLoad <- function(...) { register_s3_method("dplyr", "filter", "data.table") register_s3_method("dplyr", "intersect", "data.table") register_s3_method("dplyr", "setdiff", "data.table") register_s3_method("dplyr", "union", "data.table") register_s3_method("tidyr", "complete", "data.table") register_s3_method("tidyr", "drop_na", "data.table") register_s3_method("tidyr", "expand", "data.table") register_s3_method("tidyr", "fill", "data.table") register_s3_method("tidyr", "pivot_longer", "data.table") register_s3_method("tidyr", "pivot_wider", "data.table") register_s3_method("tidyr", "replace_na", "data.table") register_s3_method("tidyr", "nest", "data.table") register_s3_method("tidyr", "separate", "data.table") register_s3_method("dplyr", "filter", "dtplyr_step") register_s3_method("dplyr", "intersect", "dtplyr_step") register_s3_method("dplyr", "setdiff", "dtplyr_step") register_s3_method("dplyr", "union", "dtplyr_step") register_s3_method("tidyr", "complete", "dtplyr_step") register_s3_method("tidyr", "drop_na", "dtplyr_step") register_s3_method("tidyr", "expand", "dtplyr_step") register_s3_method("tidyr", "fill", "dtplyr_step") register_s3_method("tidyr", "pivot_longer", "dtplyr_step") register_s3_method("tidyr", "pivot_wider", "dtplyr_step") register_s3_method("tidyr", "replace_na", "dtplyr_step") register_s3_method("tidyr", "nest", "dtplyr_step") register_s3_method("tidyr", "separate", "dtplyr_step") } register_s3_method <- function(pkg, generic, class, fun = NULL) { stopifnot(is.character(pkg), length(pkg) == 1) stopifnot(is.character(generic), length(generic) == 1) stopifnot(is.character(class), length(class) == 1) if (is.null(fun)) { fun <- get(paste0(generic, ".", class), envir = parent.frame()) } else { stopifnot(is.function(fun)) } if (pkg %in% loadedNamespaces()) { registerS3method(generic, class, fun, envir = asNamespace(pkg)) } # Always register hook in case package is later unloaded & reloaded setHook( packageEvent(pkg, "onLoad"), function(...) { registerS3method(generic, class, fun, envir = asNamespace(pkg)) } ) } # nocov end dtplyr/R/dtplyr-package.R0000644000176200001440000000041314150760302015017 0ustar liggesusers#' @import rlang #' @importFrom data.table data.table as.data.table is.data.table #' @importFrom lifecycle deprecated #' @importFrom glue glue #' @keywords internal "_PACKAGE" #' @export .datatable.aware <- TRUE globalVariables(c(".SD", ".N", ".BY", ".I", "desc")) dtplyr/R/step-set.R0000644000176200001440000000467514126601265013677 0ustar liggesusersstep_set <- function(x, y, style) { stopifnot(is_step(x)) stopifnot(is_step(y)) stopifnot(is.character(style)) new_step( parent = x, parent2 = y, locals = utils::modifyList(x$locals, y$locals), style = style, class = "dtplyr_step_set", ) } #' @export dt_sources.dtplyr_step_set <- function(x) { dt_sources.dtplyr_step_join(x) } #' @export dt_call.dtplyr_step_set <- function(x, needs_copy = x$needs_copy) { lhs <- dt_call(x$parent, needs_copy) rhs <- dt_call(x$parent2) call <- switch(x$style, intersect = call2("fintersect", lhs, rhs), union = call2("funion", lhs, rhs), union_all = call2("funion", lhs, rhs, all = TRUE), setdiff = call2("fsetdiff", lhs, rhs), ) call } # dplyr verbs ------------------------------------------------------------- #' Set operations #' #' These are methods for the dplyr generics [intersect()], [union()], #' [union_all()], and [setdiff()]. They are translated to #' [data.table::fintersect()], [data.table::funion()], and #' [data.table::fsetdiff()]. #' #' @importFrom dplyr intersect #' @param x,y A pair of [lazy_dt()]s. #' @param ... Ignored #' @examples #' dt1 <- lazy_dt(data.frame(x = 1:4)) #' dt2 <- lazy_dt(data.frame(x = c(2, 4, 6))) #' #' intersect(dt1, dt2) #' union(dt1, dt2) #' setdiff(dt1, dt2) #' # Exported onload intersect.dtplyr_step <- function(x, y, ...) { if (!is_step(y)) { y <- lazy_dt(y) } step_set(x, y, style = "intersect") } # Exported onload intersect.data.table <- function(x, y, ...) { x <- lazy_dt(x) intersect(x, y, ...) } #' @importFrom dplyr union #' @rdname intersect.dtplyr_step # Exported onload union.dtplyr_step <- function(x, y, ...) { if (!is_step(y)) { y <- lazy_dt(y) } step_set(x, y, style = "union") } # Exported onload union.data.table <- function(x, y, ...) { x <- lazy_dt(x) union(x, y, ...) } #' @importFrom dplyr union_all #' @rdname intersect.dtplyr_step #' @export union_all.dtplyr_step <- function(x, y, ...) { if (!is_step(y)) { y <- lazy_dt(y) } step_set(x, y, style = "union_all") } #' @export union_all.data.table <- function(x, y, ...) { x <- lazy_dt(x) union_all(x, y, ...) } #' @importFrom dplyr setdiff #' @rdname intersect.dtplyr_step # Exported onload setdiff.dtplyr_step <- function(x, y, ...) { if (!is_step(y)) { y <- lazy_dt(y) } step_set(x, y, style = "setdiff") } # Exported onload setdiff.data.table <- function(x, y, ...) { x <- lazy_dt(x) setdiff(x, y, ...) } dtplyr/R/step-subset-filter.R0000644000176200001440000000342314126601265015662 0ustar liggesusers #' Subset rows using column values #' #' This is a method for the dplyr [arrange()] generic. It is translated to #' the `i` argument of `[.data.table` #' #' @param .data A [lazy_dt()]. #' @param .preserve Ignored #' @inheritParams dplyr::filter #' @examples #' library(dplyr, warn.conflicts = FALSE) #' #' dt <- lazy_dt(mtcars) #' dt %>% filter(cyl == 4) #' dt %>% filter(vs, am) #' #' dt %>% #' group_by(cyl) %>% #' filter(mpg > mean(mpg)) #' @importFrom dplyr filter # exported onLoad filter.dtplyr_step <- function(.data, ..., .preserve = FALSE) { check_filter(...) dots <- capture_dots(.data, ..., .j = FALSE) if (filter_by_lgl_col(dots)) { # Suppress data.table warning when filtering with a logical variable i <- call2("(", dots[[1]]) } else { i <- Reduce(function(x, y) call2("&", x, y), dots) } step_subset_i(.data, i) } filter_by_lgl_col <- function(dots) { if (length(dots) > 1) { return(FALSE) } dot <- dots[[1]] if (is_symbol(dot)) { return(TRUE) } # catch expressions of form `!x` is_call(dot, name = "!", n = 1) && is_symbol(dot[[2]]) } # exported onLoad filter.data.table <- function(.data, ...) { .data <- lazy_dt(.data) filter(.data, ...) } check_filter <- function(...) { dots <- enquos(...) named <- have_name(dots) for (i in which(named)) { quo <- dots[[i]] # only allow named logical vectors, anything else # is suspicious expr <- quo_get_expr(quo) if (!is.logical(expr)) { abort(c( glue::glue("Problem with `filter()` input `..{i}`."), x = glue::glue("Input `..{i}` is named."), i = glue::glue("This usually means that you've used `=` instead of `==`."), i = glue::glue("Did you mean `{name} == {as_label(expr)}`?", name = names(dots)[i]) )) } } } dtplyr/R/step-subset-expand.R0000644000176200001440000000717414150760302015656 0ustar liggesusers#' Expand data frame to include all possible combinations of values. #' #' @description #' This is a method for the tidyr `expand()` generic. It is translated to #' [data.table::CJ()]. #' #' @param ... Specification of columns to expand. Columns can be atomic vectors #' or lists. #' #' * To find all unique combinations of `x`, `y` and `z`, including those not #' present in the data, supply each variable as a separate argument: #' `expand(df, x, y, z)`. #' * To find only the combinations that occur in the #' data, use `nesting`: `expand(df, nesting(x, y, z))`. #' * You can combine the two forms. For example, #' `expand(df, nesting(school_id, student_id), date)` would produce #' a row for each present school-student combination for all possible #' dates. #' #' Unlike the data.frame method, this method does not use the full set of #' levels, just those that appear in the data. #' #' When used with continuous variables, you may need to fill in values #' that do not appear in the data: to do so use expressions like #' `year = 2010:2020` or `year = full_seq(year,1)`. #' @param data A [lazy_dt()]. #' @inheritParams tidyr::expand #' @examples #' library(tidyr) #' #' fruits <- lazy_dt(tibble( #' type = c("apple", "orange", "apple", "orange", "orange", "orange"), #' year = c(2010, 2010, 2012, 2010, 2010, 2012), #' size = factor( #' c("XS", "S", "M", "S", "S", "M"), #' levels = c("XS", "S", "M", "L") #' ), #' weights = rnorm(6, as.numeric(size) + 2) #' )) #' #' # All possible combinations --------------------------------------- #' # Note that only present levels of the factor variable `size` are retained. #' fruits %>% expand(type) #' fruits %>% expand(type, size) #' #' # This is different from the data frame behaviour: #' fruits %>% dplyr::collect() %>% expand(type, size) #' #' # Other uses ------------------------------------------------------- #' fruits %>% expand(type, size, 2010:2012) #' #' # Use `anti_join()` to determine which observations are missing #' all <- fruits %>% expand(type, size, year) #' all #' all %>% dplyr::anti_join(fruits) #' #' # Use with `right_join()` to fill in missing rows #' fruits %>% dplyr::right_join(all) # exported onLoad expand.dtplyr_step <- function(data, ..., .name_repair = "check_unique") { dots <- capture_dots(data, ..., .j = FALSE) dots <- dots[!vapply(dots, is_null, logical(1))] if (length(dots) == 0) { return(data) } named_dots <- have_name(dots) if (any(!named_dots)) { # Auto-names generated by enquos() don't always work with the CJ() step ## Ex: `1:3` # Replicates the "V" naming convention data.table uses symbol_dots <- vapply(dots, is_symbol, logical(1)) needs_v_name <- !symbol_dots & !named_dots v_names <- paste0("V", 1:length(dots)) names(dots)[needs_v_name] <- v_names[needs_v_name] names(dots)[symbol_dots] <- lapply(dots[symbol_dots], as_name) } names(dots) <- vctrs::vec_as_names(names(dots), repair = .name_repair) dots_names <- names(dots) out <- step_subset_j( data, vars = union(data$groups, dots_names), j = expr(CJ(!!!dots, unique = TRUE)) ) # Delete duplicate columns if group vars are expanded if (any(dots_names %in% out$groups)) { group_vars <- out$groups expanded_group_vars <- dots_names[dots_names %in% group_vars] out <- step_subset( out, groups = character(), j = expr(!!expanded_group_vars := NULL) ) out <- group_by(out, !!!syms(group_vars)) } out } # exported onLoad expand.data.table <- function(data, ..., .name_repair = "check_unique") { data <- lazy_dt(data) tidyr::expand(data, ..., .name_repair = .name_repair) } dtplyr/R/step-subset-select.R0000644000176200001440000000406314126601265015655 0ustar liggesusers #' Subset columns using their names #' #' This is a method for the dplyr [select()] generic. It is translated to #' the `j` argument of `[.data.table`. #' #' @param .data A [lazy_dt()]. #' @inheritParams dplyr::select #' @importFrom dplyr select #' @export #' @examples #' library(dplyr, warn.conflicts = FALSE) #' #' dt <- lazy_dt(data.frame(x1 = 1, x2 = 2, y1 = 3, y2 = 4)) #' #' dt %>% select(starts_with("x")) #' dt %>% select(ends_with("2")) #' dt %>% select(z1 = x1, z2 = x2) select.dtplyr_step <- function(.data, ...) { sim_data <- simulate_vars(.data) locs <- tidyselect::eval_select(expr(c(...)), sim_data) locs <- ensure_group_vars(locs, .data$vars, .data$groups) vars <- set_names(.data$vars[locs], names(locs)) if (length(vars) == 0) { j <- 0L groups <- .data$groups } else { groups <- rename_groups(.data$groups, vars) vars <- simplify_names(vars) is_unnamed <- all(!have_name(vars)) if (is_unnamed && identical(unname(vars), .data$vars)) { return(.data) } j <- call2(".", !!!syms(vars)) } out <- step_subset_j(.data, vars = names(locs), groups = character(), j = j) step_group(out, groups) } #' @export select.data.table <- function(.data, ...) { .data <- lazy_dt(.data) select(.data, ...) } simulate_vars <- function(x, drop_groups = FALSE) { if (drop_groups) { vars <- setdiff(x$vars, x$groups) } else { vars <- x$vars } as_tibble(rep_named(vars, list(logical())), .name_repair = "minimal") } ensure_group_vars <- function(loc, names, groups) { group_loc <- match(groups, names) missing <- setdiff(group_loc, loc) if (length(missing) > 0) { vars <- names[missing] inform(paste0( "Adding missing grouping variables: ", paste0("`", names[missing], "`", collapse = ", ") )) loc <- c(set_names(missing, vars), loc) } loc } rename_groups <- function(groups, vars) { old2new <- set_names(names(vars), vars) groups[groups %in% names(old2new)] <- old2new[groups] groups } simplify_names <- function(vars) { names(vars)[vars == names(vars)] <- "" vars } dtplyr/R/step-call.R0000644000176200001440000001416414066665321014017 0ustar liggesusersstep_call <- function(parent, fun, args = list(), vars = parent$vars, in_place = FALSE) { stopifnot(is_step(parent)) stopifnot(is.character(fun)) stopifnot(is.list(args)) new_step( parent = parent, vars = vars, groups = parent$groups, implicit_copy = !in_place, needs_copy = in_place || parent$needs_copy, fun = fun, args = args, class = "dtplyr_step_call" ) } dt_call.dtplyr_step_call <- function(x, needs_copy = x$needs_copy) { call2(x$fun, dt_call(x$parent, needs_copy), !!!x$args) } # dplyr verbs ------------------------------------------------------------- #' Subset first or last rows #' #' These are methods for the base generics [head()] and [tail()]. They #' are not translated. #' #' @param x A [lazy_dt()] #' @param n Number of rows to select. Can use a negative number to instead #' drop rows from the other end. #' @param ... Passed on to [head()]/[tail()]. #' @importFrom utils head #' @export #' @examples #' library(dplyr, warn.conflicts = FALSE) #' dt <- lazy_dt(data.frame(x = 1:10)) #' #' # first three rows #' head(dt, 3) #' # last three rows #' tail(dt, 3) #' #' # drop first three rows #' tail(dt, -3) head.dtplyr_step <- function(x, n = 6L, ...) { step_call(x, "head", args = list(n = n)) } #' @importFrom utils tail #' @export #' @rdname head.dtplyr_step tail.dtplyr_step <- function(x, n = 6L, ...) { step_call(x, "tail", args = list(n = n)) } #' Rename columns using their names #' #' These are methods for the dplyr generics [rename()] and [rename_with()]. #' They are both translated to [data.table::setnames()]. #' #' @param .data A [lazy_dt()] #' @inheritParams dplyr::rename #' @importFrom dplyr rename #' @export #' @examples #' library(dplyr, warn.conflicts = FALSE) #' dt <- lazy_dt(data.frame(x = 1, y = 2, z = 3)) #' dt %>% rename(new_x = x, new_y = y) #' dt %>% rename_with(toupper) rename.dtplyr_step <- function(.data, ...) { sim_data <- simulate_vars(.data) locs <- tidyselect::eval_rename(expr(c(...)), sim_data) step_setnames(.data, .data$vars[locs], names(locs), in_place = TRUE, rename_groups = TRUE) } #' @export rename.data.table <- function(.data, ...) { .data <- lazy_dt(.data) rename(.data, ...) } #' @importFrom dplyr rename_with #' @importFrom tidyselect everything #' @rdname rename.dtplyr_step #' @export rename_with.dtplyr_step <- function(.data, .fn, .cols = everything(), ...) { if (!missing(...)) { abort("`dtplyr::rename_with() doesn't support ...") } fn_expr <- enexpr(.fn) if (is_symbol(fn_expr)) { fn <- fn_expr } else if (is_string(fn_expr)) { fn <- sym(fn_expr) } else if (is_call(fn_expr, "~")) { env <- caller_env() call <- dt_squash_formula( fn_expr, env = env, data = .data, j = FALSE, replace = quote(x) ) fn <- new_function(exprs(x =), call, env) } else { abort("`.fn` must be a function name or formula") } # Still have to compute the new variable names for the table metadata # But this should be fast, so doing it twice shouldn't matter .fn <- as_function(.fn) sim_data <- simulate_vars(.data) locs <- unname(tidyselect::eval_select(enquo(.cols), sim_data)) old_vars <- .data$vars[locs] new_vars <- .fn(old_vars) vars <- .data$vars vars[locs] <- new_vars if (identical(locs, seq_along(sim_data))) { out <- step_call(.data, "setnames", args = list(fn), vars = vars, in_place = TRUE ) } else { out <- step_call(.data, "setnames", args = list(old_vars, fn), vars = vars, in_place = TRUE ) } groups <- rename_groups(.data$groups, set_names(new_vars, old_vars)) step_group(out, groups) } #' @export rename_with.data.table <- function(.data, .fn, .cols = everything(), ...) { .data <- lazy_dt(.data) rename_with(.data, .fn = .fn, .cols = {{.cols}}, ...) } #' Subset distinct/unique rows #' #' This is a method for the dplyr [distinct()] generic. It is translated to #' [data.table::unique.data.table()]. #' #' @importFrom dplyr distinct #' @param .data A [lazy_dt()] #' @inheritParams dplyr::distinct #' @export #' @examples #' library(dplyr, warn.conflicts = FALSE) #' df <- lazy_dt(data.frame( #' x = sample(10, 100, replace = TRUE), #' y = sample(10, 100, replace = TRUE) #' )) #' #' df %>% distinct(x) #' df %>% distinct(x, y) #' df %>% distinct(x, .keep_all = TRUE) distinct.dtplyr_step <- function(.data, ..., .keep_all = FALSE) { dots <- capture_dots(.data, ...) if (length(dots) > 0) { only_syms <- all(vapply(dots, is_symbol, logical(1))) if (.keep_all) { if (only_syms) { by <- union(.data$groups, names(dots)) } else { .data <- mutate(.data, !!!dots) by <- names(.data$new_vars) } } else { if (only_syms) { .data <- select(.data, !!!dots) } else { .data <- transmute(.data, !!!dots) } by <- NULL } } else { by <- NULL } args <- list() args$by <- by step_call(.data, "unique", args = args) } #' @export distinct.data.table <- function(.data, ...) { .data <- lazy_dt(.data) distinct(.data, ...) } #' @export unique.dtplyr_step <- function(x, incomparables = FALSE, ...) { if (!missing(incomparables)) { abort("`incomparables` not supported by `unique.dtplyr_step()`") } distinct(x) } # tidyr verbs ------------------------------------------------------------- #' Drop rows containing missing values #' #' @description #' This is a method for the tidyr `drop_na()` generic. It is translated to #' `data.table::na.omit()` #' #' @param data A [lazy_dt()]. #' @inheritParams tidyr::drop_na #' @examples #' library(dplyr) #' library(tidyr) #' #' dt <- lazy_dt(tibble(x = c(1, 2, NA), y = c("a", NA, "b"))) #' dt %>% drop_na() #' dt %>% drop_na(x) #' #' vars <- "y" #' dt %>% drop_na(x, any_of(vars)) # exported onLoad drop_na.dtplyr_step <- function(data, ...) { sim_data <- simulate_vars(data) locs <- names(tidyselect::eval_select(expr(c(...)), sim_data)) args <- list() if (length(locs) > 0) { args$cols <- locs } step_call(data, "na.omit", args = args) } # exported onLoad drop_na.data.table <- function(data, ...) { data <- lazy_dt(data) tidyr::drop_na(data, ...) } dtplyr/R/step-colorder-relocate.R0000644000176200001440000000336514031070705016476 0ustar liggesusers#' Relocate variables using their names #' #' This is a method for the dplyr [relocate()] generic. It is translated to #' the `j` argument of `[.data.table`. #' #' @param .data A [lazy_dt()]. #' @inheritParams dplyr::relocate #' @importFrom dplyr relocate #' @export #' @examples #' library(dplyr, warn.conflicts = FALSE) #' #' dt <- lazy_dt(data.frame(x = 1, y = 2, z = 3)) #' #' dt %>% relocate(z) #' dt %>% relocate(y, .before = x) #' dt %>% relocate(y, .after = y) relocate.dtplyr_step <- function(.data, ..., .before = NULL, .after = NULL) { sim_data <- simulate_vars(.data) to_move <- tidyselect::eval_select(expr(c(...)), sim_data) if (length(to_move) == 0) { return(.data) } .before <- enquo(.before) .after <- enquo(.after) has_before <- !quo_is_null(.before) has_after <- !quo_is_null(.after) if (has_before && has_after) { abort("Must supply only one of `.before` and `.after`.") } else if (has_before) { where <- min(unname(tidyselect::eval_select(.before, sim_data))) if (!where %in% to_move) { to_move <- c(to_move, where) } } else if (has_after) { where <- max(unname(tidyselect::eval_select(.after, sim_data))) if (!where %in% to_move) { to_move <- c(where, to_move) } } else { where <- 1L if (!where %in% to_move) { to_move <- union(to_move, where) } } lhs <- setdiff(seq2(1, where - 1), to_move) rhs <- setdiff(seq2(where + 1, ncol(.data)), to_move) new_vars <- .data$vars[unique(c(lhs, to_move, rhs))] out <- step_colorder(.data, new_vars) step_group(out, .data$groups) } #' @export relocate.data.table <- function(.data, ..., .before = NULL, .after = NULL) { .data <- lazy_dt(.data) relocate(.data, ..., .before = {{ .before }}, .after = {{ .after }}) } dtplyr/R/step-call-pivot_wider.R0000644000176200001440000001431114066665321016342 0ustar liggesusers#' Pivot data from long to wide #' #' @description #' This is a method for the tidyr `pivot_wider()` generic. It is translated to #' [data.table::dcast()] #' #' @param data A [lazy_dt()]. #' @inheritParams tidyr::pivot_wider #' @param values_fn A function, the default is `length()`. Note this is different #' behavior than `tidyr::pivot_wider()`, which returns a list column by default. #' @examples #' library(tidyr) #' #' fish_encounters_dt <- lazy_dt(fish_encounters) #' fish_encounters_dt #' fish_encounters_dt %>% #' pivot_wider(names_from = station, values_from = seen) #' # Fill in missing values #' fish_encounters_dt %>% #' pivot_wider(names_from = station, values_from = seen, values_fill = 0) #' #' # Generate column names from multiple variables #' us_rent_income_dt <- lazy_dt(us_rent_income) #' us_rent_income_dt #' us_rent_income_dt %>% #' pivot_wider(names_from = variable, values_from = c(estimate, moe)) #' #' # When there are multiple `names_from` or `values_from`, you can use #' # use `names_sep` or `names_glue` to control the output variable names #' us_rent_income_dt %>% #' pivot_wider( #' names_from = variable, #' names_sep = ".", #' values_from = c(estimate, moe) #' ) #' #' # Can perform aggregation with values_fn #' warpbreaks_dt <- lazy_dt(as_tibble(warpbreaks[c("wool", "tension", "breaks")])) #' warpbreaks_dt #' warpbreaks_dt %>% #' pivot_wider( #' names_from = wool, #' values_from = breaks, #' values_fn = mean #' ) # exported onLoad pivot_wider.dtplyr_step <- function(data, id_cols = NULL, names_from = name, names_prefix = "", names_sep = "_", names_glue = NULL, names_sort = FALSE, names_repair = "check_unique", values_from = value, values_fill = NULL, values_fn = NULL, ...) { sim_data <- simulate_vars(data) names_from <- names(tidyselect::eval_select(enquo(names_from), sim_data)) values_from <- names(tidyselect::eval_select(enquo(values_from), sim_data)) id_cols <- enquo(id_cols) if (quo_is_null(id_cols)) { sim_vars <- names(sim_data) id_cols <- sim_vars[!sim_vars %in% c(names_from, values_from)] } else { id_cols <- names(tidyselect::eval_select(id_cols, sim_data)) } if (length(names_from) > 1) { new_vars <- mutate(shallow_dt(data), .names_from = paste(!!!syms(names_from), sep = names_sep)) new_vars <- unique(pull(new_vars, .names_from)) } else { new_vars <- unique(pull(data, !!sym(names_from))) new_vars <- as.character(new_vars) } if (!is.null(names_glue)) { glue_df <- as.data.table(distinct(ungroup(data), !!!syms(names_from))) glue_df <- vctrs::vec_rep(glue_df, length(values_from)) glue_df$.value <- vctrs::vec_rep_each(values_from, length(new_vars)) glue_vars <- as.character(glue::glue_data(glue_df, names_glue)) } if (length(values_from) > 1) { new_vars <- lapply(values_from, function(.x) paste(.x, new_vars, sep = names_sep)) new_vars <- unlist(new_vars) } no_id <- length(id_cols) == 0 if (no_id) { lhs <- "..." # using symbol causes dcast() to fail new_vars <- c(".", new_vars) } else { lhs <- call_reduce(syms(id_cols), "+") } rhs <- call_reduce(syms(names_from), "+") vars <- c(id_cols, new_vars) args <- list( formula = call2("~", lhs, rhs), value.var = values_from, fun.aggregate = values_fn, sep = names_sep, fill = values_fill ) # Clean up call args if defaults are used args <- args[!vapply(args, is.null, lgl(1))] if (names_sep == "_") { args$sep <- NULL } out <- step_call(data, "dcast", args = args, vars = vars) if (no_id && names_sort) { new_vars <- new_vars[new_vars != "."] cols_sorted <- sort(new_vars) out <- select(out, !!!syms(cols_sorted)) } else if (no_id) { new_vars <- new_vars[new_vars != "."] out <- select(out, -.) } if (!is.null(names_glue)) { out <- step_setnames(out, new_vars, glue_vars, in_place = FALSE) # In case of names_sort = TRUE new_vars <- glue_vars } else if (nchar(names_prefix) > 0) { new_names <- paste0(names_prefix, new_vars) out <- step_setnames(out, new_vars, new_names, in_place = FALSE) # In case of names_sort = TRUE new_vars <- new_names } if (names_sort && !no_id) { cols_sorted <- c(id_cols, sort(new_vars)) out <- step_colorder(out, cols_sorted) } out <- step_repair(out, repair = names_repair) out } globalVariables(c(".", ".names_from", "name", "value", "pivot_wider")) # exported onLoad pivot_wider.data.table <- function(data, id_cols = NULL, names_from = name, names_prefix = "", names_sep = "_", names_glue = NULL, names_sort = FALSE, names_repair = "check_unique", values_from = value, values_fill = NULL, values_fn = NULL, ...) { data <- lazy_dt(data) pivot_wider( data, id_cols = {{ id_cols }}, names_from = {{ names_from }}, names_prefix = names_prefix, names_sep = names_sep, names_glue = names_glue, names_sort = names_sort, names_repair = names_repair, values_from = {{ values_from }}, values_fn = values_fn ) } step_repair <- function(data, repair = "check_unique", in_place = TRUE) { sim_data <- simulate_vars(data) data_names <- names(sim_data) repaired_names <- vctrs::vec_as_names(data_names, repair = repair) if (any(data_names != repaired_names)) { data <- step_setnames(data, seq_along(data_names), repaired_names, in_place = in_place) } data } shallow_dt <- function(x) { filter(x, TRUE) } call_reduce <- function(x, fun) { Reduce(function(x, y) call2(fun, x, y), x) } dtplyr/R/step.R0000644000176200001440000001372714126601265013104 0ustar liggesusers# We use a hybrid approach where most of the computation is done on # construction. This avoids the deeply recursive approach of dbplyr, which # should improve performance because we're not repeatedly re-computing the # same values. # # dt_call() is managed separately because it involves much more code (which # which dilute the intent of the constructor), and should only be called # relatively few times. new_step <- function(parent, vars = parent$vars, groups = parent$groups, locals = parent$locals, implicit_copy = parent$implicit_copy, needs_copy = parent$needs_copy, env = parent$env, ..., class = character()) { stopifnot(is.data.table(parent) || is_step(parent)) stopifnot(is.character(vars)) stopifnot(is.character(groups)) structure( list( parent = parent, vars = vars, groups = groups, locals = locals, implicit_copy = implicit_copy, needs_copy = needs_copy, env = env, ... ), class = c(class, "dtplyr_step") ) } #' @export dim.dtplyr_step <- function(x) { c(NA, length(x$vars)) } #' @importFrom dplyr tbl_vars #' @export tbl_vars.dtplyr_step <- function(x) { x$vars } #' @importFrom dplyr groups #' @export groups.dtplyr_step <- function(x) { syms(x$groups) } #' @importFrom dplyr group_vars #' @export group_vars.dtplyr_step <- function(x) { x$groups } #' @importFrom dplyr group_size #' @export group_size.dtplyr_step <- function(x) { collect(summarise(x, n = .N))$n } #' @importFrom dplyr n_groups #' @export n_groups.dtplyr_step <- function(x) { length(group_size(x)) } #' Force computation of a lazy data.table #' #' * `collect()` returns a tibble, grouped if needed. #' * `compute()` generates an intermediate assignment in the translation. #' * `as.data.table()` returns a data.table. #' * `as.data.frame()` returns a data frame. #' * `as_tibble()` returns a tibble. #' #' @export #' @param x A [lazy_dt] #' @param ... Arguments used by other methods. #' @importFrom dplyr collect #' @examples #' library(dplyr, warn.conflicts = FALSE) #' #' dt <- lazy_dt(mtcars) #' #' # Generate translation #' avg_mpg <- dt %>% #' filter(am == 1) %>% #' group_by(cyl) %>% #' summarise(mpg = mean(mpg)) #' #' # Show translation and temporarily compute result #' avg_mpg #' #' # compute and return tibble #' avg_mpg_tb <- as_tibble(avg_mpg) #' avg_mpg_tb #' #' # compute and return data.table #' avg_mpg_dt <- data.table::as.data.table(avg_mpg) #' avg_mpg_dt #' #' # modify translation to use intermediate assignment #' compute(avg_mpg) #' collect.dtplyr_step <- function(x, ...) { # for consistency with dbplyr::collect() out <- as_tibble(x) if (length(x$groups) > 0) { out <- group_by(out, !!!syms(x$groups)) } out } #' @rdname collect.dtplyr_step #' @param name Name of intermediate data.table. #' @export #' @importFrom dplyr compute compute.dtplyr_step <- function(x, name = unique_name(), ...) { if (!dt_has_computation(x)) { return(x) } step_locals(x, set_names(list(dt_call(x)), name), name) } #' @rdname collect.dtplyr_step #' @export #' @param keep.rownames Ignored as dplyr never preserves rownames. as.data.table.dtplyr_step <- function(x, keep.rownames = FALSE, ...) { dt_eval(x)[] } #' @rdname collect.dtplyr_step #' @export as.data.frame.dtplyr_step <- function(x, ...) { as.data.frame(dt_eval(x)) } #' @rdname collect.dtplyr_step #' @export #' @importFrom tibble as_tibble #' @param .name_repair Treatment of problematic column names as_tibble.dtplyr_step <- function(x, ..., .name_repair = "check_unique") { out <- as_tibble(dt_eval(x), .name_repair = .name_repair) attr(out, ".internal.selfref") <- NULL attr(out, "sorted") <- NULL out } #' @export #' @importFrom dplyr pull pull.dtplyr_step <- function(.data, var = -1, name = NULL) { var <- sym(tidyselect::vars_pull(.data$vars, !!enquo(var))) .data <- ungroup(.data) name <- enquo(name) if (quo_is_null(name)) { .data <- select(.data, !! var) .data <- collect(.data) .data[[1]] } else { name <- sym(tidyselect::vars_pull(.data$vars, !!name)) .data <- select(.data, !! var, !! name) .data <- collect(.data) set_names(.data[[1]], .data[[2]]) } } #' @export print.dtplyr_step <- function(x, ...) { dt <- as.data.table(x) cat_line(crayon::bold("Source: "), "local data table ", dplyr::dim_desc(dt)) if (length(x$groups) > 0) { cat_line(crayon::bold("Groups: "), paste(x$groups, collapse = ", ")) } if (length(x$locals) > 0) { cat_line(crayon::bold("Call:")) for (var in names(x$locals)) { cat_line(" ", var, " <- ", expr_deparse(x$locals[[var]])) } cat_line(" ", expr_text(dt_call(x))) } else { cat_line(crayon::bold("Call: "), expr_text(dt_call(x))) } cat_line() cat_line(format(as_tibble(dt, .name_repair = "minimal"), n = 6)[-1]) # Hack to remove "A tibble" line cat_line() cat_line(crayon::silver( "# Use as.data.table()/as.data.frame()/as_tibble() to access results" )) invisible(x) } #' @importFrom dplyr glimpse #' @export glimpse.dtplyr_step <- function(x, width = NULL, ...) { glimpse(collect(x), width = width, ...) } #' @importFrom dplyr show_query #' @export show_query.dtplyr_step <- function(x) { dt_call(x) } is_step <- function(x) inherits(x, "dtplyr_step") # Returns a named list of data.tables: most just dispatch to their # parent. The only exceptions are dt_step_first() and the two-table verbs. dt_sources <- function(x) { UseMethod("dt_sources") } #' @export dt_sources.dtplyr_step <- function(x) { dt_sources(x$parent) } dt_call <- function(x, needs_copy = x$needs_copy) { UseMethod("dt_call") } #' @export dt_call.dtplyr_step <- function(x, needs_copy = x$needs_copy) { dt_call(x$parent, needs_copy) } dt_has_computation <- function(x) { UseMethod("dt_has_computation") } #' @export dt_has_computation.dtplyr_step <- function(x, needs_copy = x$needs_copy) { TRUE } dtplyr/R/step-first.R0000644000176200001440000001027714021443044014217 0ustar liggesusers#' Create a "lazy" data.table for use with dplyr verbs #' #' @description #' A lazy data.table lazy captures the intent of dplyr verbs, only actually #' performing computation when requested (with [collect()], [pull()], #' [as.data.frame()], [data.table::as.data.table()], or [tibble::as_tibble()]). #' This allows dtplyr to convert dplyr verbs into as few data.table expressions #' as possible, which leads to a high performance translation. #' #' See `vignette("translation")` for the details of the translation. #' #' @param x A data table (or something can can be coerced to a data table). #' @param immutable If `TRUE`, `x` is treated as immutable and will never #' be modified by any code generated by dtplyr. Alternatively, you can set #' `immutable = FALSE` to allow dtplyr to modify the input object. #' @param name Optionally, supply a name to be used in generated expressions. #' For expert use only. #' @param key_by Set keys for data frame, using [select()] semantics (e.g. #' `key_by = c(key1, key2)`. #' #' This uses [data.table::setkey()] to sort the table and build an index. #' This will considerably improve performance for subsets, summaries, and #' joins that use the keys. #' #' See `vignette("datatable-keys-fast-subset")` for more details. #' @export #' @aliases tbl_dt grouped_dt #' @examples #' library(dplyr, warn.conflicts = FALSE) #' #' # If you have a data.table, using it with any dplyr generic will #' # automatically convert it to a lazy_dt object #' dt <- data.table::data.table(x = 1:10, y = 10:1) #' dt %>% filter(x == y) #' dt %>% mutate(z = x + y) #' #' # Note that dtplyr will avoid mutating the input data.table, so the #' # previous translation includes an automatic copy(). You can avoid this #' # with a manual call to lazy_dt() #' dt %>% #' lazy_dt(immutable = FALSE) %>% #' mutate(z = x + y) #' #' # If you have a data frame, you can use lazy_dt() to convert it to #' # a data.table: #' mtcars2 <- lazy_dt(mtcars) #' mtcars2 #' mtcars2 %>% select(mpg:cyl) #' mtcars2 %>% select(x = mpg, y = cyl) #' mtcars2 %>% filter(cyl == 4) %>% select(mpg) #' mtcars2 %>% select(mpg, cyl) %>% filter(cyl == 4) #' mtcars2 %>% mutate(cyl2 = cyl * 2, cyl4 = cyl2 * 2) #' mtcars2 %>% transmute(cyl2 = cyl * 2, vs2 = vs * 2) #' mtcars2 %>% filter(cyl == 8) %>% mutate(cyl2 = cyl * 2) #' #' # Learn more about translation in vignette("translation") #' by_cyl <- mtcars2 %>% group_by(cyl) #' by_cyl %>% summarise(mpg = mean(mpg)) #' by_cyl %>% mutate(mpg = mean(mpg)) #' by_cyl %>% #' filter(mpg < mean(mpg)) %>% #' summarise(hp = mean(hp)) lazy_dt <- function(x, name = NULL, immutable = TRUE, key_by = NULL) { # in case `x` has an `as.data.table()` method but not a `group_vars()` method groups <- tryCatch(group_vars(x), error = function(e) character()) if (!is.data.table(x)) { if (!immutable) { abort("`immutable` must be `TRUE` when `x` is not already a data table.") } x <- as.data.table(x) copied <- TRUE } else { copied <- FALSE } key_by <- enquo(key_by) key_vars <- unname(tidyselect::vars_select(names(x), !!key_by)) if (length(key_vars)) { if (immutable && !copied) { x <- data.table::copy(x) } data.table::setkeyv(x, key_vars) } step_first(x, name = name, groups = groups, immutable = immutable, env = caller_env()) } #' @export dim.dtplyr_step_first <- function(x) { dim(x$parent) } step_first <- function(parent, name = NULL, groups = character(), immutable = TRUE, env = caller_env()) { stopifnot(is.data.table(parent)) if (is.null(name)) { name <- unique_name() } new_step(parent, vars = names(parent), groups = groups, locals = list(), implicit_copy = !immutable, needs_copy = FALSE, name = sym(name), env = env, class = "dtplyr_step_first" ) } #' @export dt_call.dtplyr_step_first <- function(x, needs_copy = FALSE) { if (needs_copy) { expr(copy(!!x$name)) } else { x$name } } #' @export dt_sources.dtplyr_step_first <- function(x) { stats::setNames(list(x$parent), as.character(x$name)) } #' @export dt_has_computation.dtplyr_step_first <- function(x) { FALSE } unique_name <- local({ i <- 0 function() { i <<- i + 1 paste0("_DT", i) } }) dtplyr/R/step-subset-do.R0000644000176200001440000000147514006775763015020 0ustar liggesusers#' @importFrom dplyr do #' @export do.dtplyr_step <- function(.data, ...) { # This is a partial implementation, because I don't think that many # people are likely to use it, given that do() is marked as questioning # Problems: # * doesn't handle unnamed case # * doesn't set .SDcols so `.SD` will only refer to non-groups # * can duplicating group vars (#5) dots <- capture_dots(.data, ...) if (any(names2(dots) == "")) { # I can't see any way to figure out what the variables are abort("Unnamed do() not supported by dtplyr") } new_vars <- lapply(dots, function(x) call2(".", x)) j <- call2(".", !!!new_vars) vars <- union(.data$vars, names(dots)) step_subset_j(.data, vars = vars, j = j) } #' @export do.data.table <- function(.data, ...) { .data <- lazy_dt(.data) do(.data, ...) } dtplyr/R/step-assign.R0000644000176200001440000000064714006775461014372 0ustar liggesusersstep_locals <- function(parent, locals, name) { stopifnot(is_step(parent)) stopifnot(is.list(locals)) stopifnot(is_string(name)) new_step( parent = parent, locals = utils::modifyList(parent$locals, locals), implicit_copy = TRUE, needs_copy = FALSE, name = name, class = "dtplyr_step_assign", ) } #' @export dt_call.dtplyr_step_assign <- function(x, needs_copy = FALSE) { sym(x$name) } dtplyr/R/step-colorder.R0000644000176200001440000000173114126601265014703 0ustar liggesusersstep_colorder <- function(x, col_order) { stopifnot(is_step(x)) stopifnot(is.character(col_order) || is.integer(col_order)) if (any(duplicated(col_order))) { abort("Every element of `col_order` must be unique.") } if (is.integer(col_order)) { if (identical(col_order, seq_along(col_order))) { return(x) } vars <- x$vars[col_order] } else { vars_selected <- x$vars[x$vars %in% col_order] vars_count <- vctrs::vec_count(vars_selected) vars_problematic <- vars_count$key[vars_count$count != 1] if (!is_empty(vars_problematic)) { vars_error <- paste0(vars_problematic, collapse = ", ") msg <- paste0("The column(s) ", vars_error, " do not uniquely match a column in `x`.") abort(msg) } if (identical(col_order, x$vars[seq_along(col_order)])) { return(x) } vars <- col_order } step_call(x, "setcolorder", args = list(col_order), vars = vars, in_place = !x$implicit_copy ) } dtplyr/R/step-nest.R0000644000176200001440000000457614126602467014062 0ustar liggesusers#' Nest #' #' @description #' This is a method for the tidyr [tidyr::nest()] generic. It is translated #' using the non-nested variables in the `by` argument and `.SD` in the `j` #' argument. #' #' @inheritParams tidyr::nest #' @param ... <[`tidy-select`][tidyr::tidyr_tidy_select]> Columns to nest, specified #' using name-variable pairs of the form `new_col = c(col1, col2, col3)`. #' The right hand side can be any valid tidy select expression. #' @param .key Not supported. #' @param data A [lazy_dt()]. #' @examples #' if (require("tidyr", quietly = TRUE)) { #' dt <- lazy_dt(tibble(x = c(1, 2, 1), y = c("a", "a", "b"))) #' dt %>% nest(data = y) #' #' dt %>% dplyr::group_by(x) %>% nest() #' } # exported onLoad nest.dtplyr_step <- function(.data, ..., .names_sep = NULL, .key = deprecated()) { if (lifecycle::is_present(.key)) { abort(c( "`nest()` for lazy data.tables doesn't support the `.key` argument.", i = "Use a name in the `...` argument instead." )) } cols <- eval_nest_dots(.data, ...) cols <- lapply(cols, set_names) if (!is.null(.names_sep)) { cols <- imap(cols, strip_names, .names_sep) } if (length(cols) == 1 && is.null(.names_sep)) { # use `.SD` as it is shorter and faster nm <- names(cols) j_exprs <- exprs(!!nm := .(.SD)) } else { j_exprs <- imap( cols, function(x, name) { x <- simplify_names(x) expr(.(data.table(!!!syms(x)))) } ) } asis <- setdiff(.data$vars, unlist(cols)) out <- step_subset_j( .data, vars = c(asis, names(cols)), j = expr(.(!!!j_exprs)), groups = asis, arrange = FALSE ) groups <- intersect(out$vars, group_vars(.data)) group_by(out, !!!syms(groups)) } # exported onLoad nest.data.table <- function(.data, ..., .names_sep = NULL, .key = deprecated()) { .data <- lazy_dt(.data) tidyr::nest(.data, ..., .names_sep = .names_sep, .key = .key) } eval_nest_dots <- function(.data, ...) { if (missing(...)) { groups <- group_vars(.data) if (is_empty(groups)) { warn(paste0( "`...` must not be empty for ungrouped data frames.\n", "Did you want `data = everything()`?" )) } nest_vars <- setdiff(.data$vars, groups) list(data = nest_vars) } else { cols <- enquos(...) sim_data <- simulate_vars(.data) lapply(cols, function(.x) names(tidyselect::eval_select(.x, sim_data))) } } dtplyr/R/step-subset-slice.R0000644000176200001440000002232414150760302015470 0ustar liggesusers #' Subset rows using their positions #' #' @description #' These are methods for the dplyr [slice()], `slice_head()`, `slice_tail()`, #' `slice_min()`, `slice_max()` and `slice_sample()` generics. They are #' translated to the `i` argument of `[.data.table`. #' #' Unlike dplyr, `slice()` (and `slice()` alone) returns the same number of #' rows per group, regardless of whether or not the indices appear in each #' group. #' #' @importFrom dplyr slice #' @param .data A [lazy_dt()]. #' @param n,prop Provide either `n`, the number of rows, or `prop`, the #' proportion of rows to select. If neither are supplied, `n = 1` will be #' used. #' #' If a negative value of `n` or `prop` is provided, the specified number or #' proportion of rows will be removed. #' #' If `n` is greater than the number of rows in the group (or `prop > 1`), #' the result will be silently truncated to the group size. If the #' `prop`ortion of a group size does not yield an integer number of rows, the #' absolute value of `prop*n()` is rounded down. #' @param ... Positive integers giving rows to select, or negative #' integers giving rows to drop. #' @export #' @examples #' library(dplyr, warn.conflicts = FALSE) #' #' dt <- lazy_dt(mtcars) #' dt %>% slice(1, 5, 10) #' dt %>% slice(-(1:4)) #' #' # First and last rows based on existing order #' dt %>% slice_head(n = 5) #' dt %>% slice_tail(n = 5) #' #' # Rows with minimum and maximum values of a variable #' dt %>% slice_min(mpg, n = 5) #' dt %>% slice_max(mpg, n = 5) #' #' # slice_min() and slice_max() may return more rows than requested #' # in the presence of ties. Use with_ties = FALSE to suppress #' dt %>% slice_min(cyl, n = 1) #' dt %>% slice_min(cyl, n = 1, with_ties = FALSE) #' #' # slice_sample() allows you to random select with or without replacement #' dt %>% slice_sample(n = 5) #' dt %>% slice_sample(n = 5, replace = TRUE) #' #' # you can optionally weight by a variable - this code weights by the #' # physical weight of the cars, so heavy cars are more likely to get #' # selected #' dt %>% slice_sample(weight_by = wt, n = 5) slice.dtplyr_step <- function(.data, ...) { dots <- capture_dots(.data, ..., .j = FALSE) if (length(dots) == 0) { i <- NULL } else { if (length(dots) == 1) { .rows <- dots[[1]] } else { .rows <- call2("c", !!!dots) } # Update logic once data.table #4353 is merged # https://github.com/Rdatatable/data.table/pull/4353 between <- call2("between", .rows, quote(-.N), quote(.N)) i <- call2("[", .rows, between) } step_subset_i(.data, i) } #' @export slice.data.table <- function(.data, ...) { .data <- lazy_dt(.data) slice(.data, ...) } #' @rdname slice.dtplyr_step #' @importFrom dplyr slice_head #' @inheritParams dplyr::slice #' @export slice_head.dtplyr_step <- function(.data, ..., n, prop) { ellipsis::check_dots_empty() size <- get_slice_size(n, prop, "slice_head") i <- expr(rlang::seq2(1L, !!size)) step_subset_i(.data, i = i) } #' @export slice_head.data.table <- function(.data, ..., n, prop) { .data <- lazy_dt(.data) slice_head(.data, ..., n = n, prop = prop) } #' @rdname slice.dtplyr_step #' @importFrom dplyr slice_tail #' @export slice_tail.dtplyr_step <- function(.data, ..., n, prop) { ellipsis::check_dots_empty() size <- get_slice_size(n, prop, "slice_tail") i <- expr(rlang::seq2(.N - !!size + 1L, .N)) step_subset_i(.data, i = i) } #' @export slice_tail.data.table <- function(.data, ..., n, prop) { .data <- lazy_dt(.data) slice_tail(.data, ..., n = n, prop = prop) } #' @rdname slice.dtplyr_step #' @importFrom dplyr slice_min #' @inheritParams dplyr::slice #' @export slice_min.dtplyr_step <- function(.data, order_by, ..., n, prop, with_ties = TRUE) { if (missing(order_by)) { abort("argument `order_by` is missing, with no default.") } slice_min_max( .data, order_by = {{ order_by }}, decreasing = FALSE, ..., n = n, prop = prop, with_ties = with_ties, .slice_fn = "slice_min" ) } #' @export slice_min.data.table <- function(.data, order_by, ..., n, prop, with_ties = TRUE) { .data <- lazy_dt(.data) slice_min(.data, {{ order_by }}, ..., n = n, prop = prop, with_ties = with_ties) } #' @rdname slice.dtplyr_step #' @importFrom dplyr slice_max #' @export slice_max.dtplyr_step <- function(.data, order_by, ..., n, prop, with_ties = TRUE) { if (missing(order_by)) { abort("argument `order_by` is missing, with no default.") } slice_min_max( .data, order_by = {{ order_by }}, decreasing = TRUE, ..., n = n, prop = prop, with_ties = with_ties, .slice_fn = "slice_max" ) } #' @export slice_max.data.table <- function(.data, order_by, ..., n, prop, with_ties = TRUE) { .data <- lazy_dt(.data) slice_max(.data, {{ order_by }}, ..., n = n, prop = prop, with_ties = with_ties) } slice_min_max <- function(.data, order_by, decreasing, ..., n, prop, with_ties = TRUE, .slice_fn = "slice_min_max") { ellipsis::check_dots_empty() size <- get_slice_size(n, prop, .slice_fn) order_by <- capture_dot(.data, {{ order_by }}, j = FALSE) if (decreasing) { order_by <- expr(desc(!!order_by)) } if (with_ties) { ties.method <- "min" } else { ties.method <- "first" } i <- expr(!!smaller_ranks(!!order_by, !!size, ties.method = ties.method)) out <- step_subset_i(.data, i) arrange(out, !!order_by, .by_group = TRUE) } smaller_ranks <- function(x, y, ties.method = "min") { x <- enexpr(x) y <- enexpr(y) # `frank()` by group is much slower than rank # https://github.com/Rdatatable/data.table/issues/3988 # also https://github.com/Rdatatable/data.table/issues/4284 expr(rank(!!x, ties.method = !!ties.method, na.last = "keep") <= !!y) } #' @importFrom dplyr slice_sample #' @inheritParams dplyr::slice #' @export slice_sample.dtplyr_step <- function(.data, ..., n, prop, weight_by = NULL, replace = FALSE) { size <- get_slice_size(n, prop, "slice_sample") ellipsis::check_dots_empty() wt <- enexpr(weight_by) i <- sample_int(.N, !!size, replace = replace, wt = wt) step_subset_i(.data, i) } #' @export slice_sample.data.table <- function(.data, ..., n, prop, weight_by = NULL, replace = FALSE) { .data <- lazy_dt(.data) slice_sample(.data, ..., n = n, prop = prop, weight_by = !!enexpr(weight_by), replace = replace) } sample_int <- function(n, size, replace = FALSE, wt = NULL) { n <- enexpr(n) size <- enexpr(size) if (replace) { out <- expr(sample.int(!!n, !!size, replace = TRUE)) } else { out <- expr(sample.int(!!n, min(!!size, !!n))) } if (!is.null(wt)) { out$prob <- wt } out } # sample_ ----------------------------------------------------------------- #' @importFrom dplyr sample_n #' @export sample_n.dtplyr_step <- function(tbl, size, replace = FALSE, weight = NULL ) { weight <- enexpr(weight) step_subset_i(tbl, i = sample_call(size, replace, weight)) } #' @export sample_n.data.table <- function(.data, ...) { .data <- lazy_dt(.data) sample_n(.data, ...) } #' @importFrom dplyr sample_frac #' @export sample_frac.dtplyr_step <- function(tbl, size = 1, replace = FALSE, weight = NULL ) { weight <- enexpr(weight) step_subset_i(tbl, i = sample_call(expr(.N * !!size), replace, weight)) } #' @export sample_frac.data.table <- function(.data, ...) { .data <- lazy_dt(.data) sample_frac(.data, ...) } # helpers ----------------------------------------------------------------- check_constant <- function(x, name, fn) { withCallingHandlers(force(x), error = function(e) { abort(c( glue("`{name}` must be a constant in `{fn}()`."), x = conditionMessage(e) ), parent = e) }) } check_slice_size <- function(n, prop, .slice_fn = "check_slice_size") { if (missing(n) && missing(prop)) { list(type = "n", n = 1L) } else if (!missing(n) && missing(prop)) { n <- check_constant(n, "n", .slice_fn) if (!is.numeric(n) || length(n) != 1 || is.na(n)) { abort("`n` must be a single number.") } list(type = "n", n = as.integer(n)) } else if (!missing(prop) && missing(n)) { prop <- check_constant(prop, "prop", .slice_fn) if (!is.numeric(prop) || length(prop) != 1 || is.na(prop)) { abort("`prop` must be a single number.") } list(type = "prop", prop = prop) } else { abort("Must supply exactly one of `n` and `prop` arguments.") } } get_slice_size <- function(n, prop, .slice_fn = "get_slice_size") { slice_input <- check_slice_size(n, prop, .slice_fn) if (slice_input$type == "n") { if (slice_input$n < 0) { expr(max(.N + !!slice_input$n, 0L)) } else { expr(min(!!slice_input$n, .N)) } } else if (slice_input$type == "prop") { if (slice_input$prop < 0) { expr(max(.N + as.integer(!!slice_input$prop * .N), 0L)) } else { expr(min(as.integer(!!slice_input$prop * .N), .N)) } } } sample_call <- function(size, replace = FALSE, weight = NULL) { call <- expr(sample(.N, !!size)) if (replace) { call$replace <- TRUE } call$prob <- weight call } dtplyr/R/step-subset-arrange.R0000644000176200001440000000200014126601265016002 0ustar liggesusers#' Arrange rows by column values #' #' This is a method for dplyr generic [arrange()]. It is translated to #' an [order()] call in the `i` argument of `[.data.table`. #' #' @param .data A [lazy_dt()]. #' @inheritParams dplyr::arrange #' @importFrom dplyr arrange #' @export #' @examples #' library(dplyr, warn.conflicts = FALSE) #' #' dt <- lazy_dt(mtcars) #' dt %>% arrange(vs, cyl) #' dt %>% arrange(desc(vs), cyl) #' dt %>% arrange(across(mpg:disp)) arrange.dtplyr_step <- function(.data, ..., .by_group = FALSE) { dots <- capture_dots(.data, ..., .j = FALSE) if (.by_group) { dots <- c(syms(.data$groups), dots) } if (length(dots) == 0) { return(.data) } # Order without grouping then restore dots <- set_names(dots, NULL) step <- step_subset(.data, i = call2("order", !!!dots), groups = character()) step_group(step, groups = .data$groups) } #' @export arrange.data.table <- function(.data, ..., .by_group = FALSE) { .data <- lazy_dt(.data) arrange(.data, ..., .by_group = .by_group) } dtplyr/R/step-call-pivot_longer.R0000644000176200001440000002617614031070705016516 0ustar liggesusers#' Pivot data from wide to long #' #' @description #' This is a method for the tidyr `pivot_longer()` generic. It is translated to #' [data.table::melt()] #' #' @param data A [lazy_dt()]. #' @inheritParams tidyr::pivot_longer #' @param names_ptypes,names_transform,values_ptypes,values_transform #' Not currently supported by dtplyr. #' @examples #' library(tidyr) #' #' # Simplest case where column names are character data #' relig_income_dt <- lazy_dt(relig_income) #' relig_income_dt %>% #' pivot_longer(!religion, names_to = "income", values_to = "count") #' #' # Slightly more complex case where columns have common prefix, #' # and missing missings are structural so should be dropped. #' billboard_dt <- lazy_dt(billboard) #' billboard %>% #' pivot_longer( #' cols = starts_with("wk"), #' names_to = "week", #' names_prefix = "wk", #' values_to = "rank", #' values_drop_na = TRUE #' ) #' #' # Multiple variables stored in column names #' lazy_dt(who) %>% #' pivot_longer( #' cols = new_sp_m014:newrel_f65, #' names_to = c("diagnosis", "gender", "age"), #' names_pattern = "new_?(.*)_(.)(.*)", #' values_to = "count" #' ) #' #' # Multiple observations per row #' anscombe_dt <- lazy_dt(anscombe) #' anscombe_dt %>% #' pivot_longer( #' everything(), #' names_to = c(".value", "set"), #' names_pattern = "(.)(.)" #' ) # exported onLoad pivot_longer.dtplyr_step <- function(data, cols, names_to = "name", names_prefix = NULL, names_sep = NULL, names_pattern = NULL, names_ptypes = NULL, names_transform = NULL, names_repair = "check_unique", values_to = "value", values_drop_na = FALSE, values_ptypes = NULL, values_transform = NULL, ...) { if (!is.null(names_ptypes)) { abort("`names_ptypes` is not supported by dtplyr") } if (!is.null(names_transform)) { abort("`names_transform` is not supported by dtplyr") } if (!is.null(values_ptypes)) { abort("`values_ptypes` is not supported by dtplyr") } if (!is.null(values_transform)) { abort("`values_transform` is not supported by dtplyr") } sim_data <- simulate_vars(data) measure_vars <- names(tidyselect::eval_select(enquo(cols), sim_data)) if (length(measure_vars) == 0) { abort("`cols` must select at least one column.") } multiple_names_to <- length(names_to) > 1 uses_dot_value <- ".value" %in% names_to variable_name <- "variable" if (uses_dot_value) { if (!is.null(names_sep)) { names_to_setup <- str_separate(measure_vars, into = names_to, sep = names_sep) } else if (!is.null(names_pattern)) { names_to_setup <- str_extract(measure_vars, into = names_to, names_pattern) } else { abort("If you use '.value' in `names_to` you must also supply `names_sep' or `names_pattern") } .value <- names_to_setup$.value v_fct <- factor(.value, levels = unique(.value)) measure_vars <- split(measure_vars, v_fct) values_to <- names(measure_vars) names(measure_vars) <- NULL if (multiple_names_to) { variable_name <- names_to[!names_to == ".value"] .value_ids <- split(names_to_setup[[variable_name]], v_fct) .value_id <- .value_ids[[1]] # Make sure data is "balanced" # https://github.com/Rdatatable/data.table/issues/2575 # The list passed to measure.vars also needs the same number of column names per element equal_ids <- vapply( .value_ids[-1], function(.x) isTRUE(all.equal(.value_id, .x)), logical(1) ) if (all(equal_ids)) { .value_id <- vctrs::vec_rep_each(.value_id, length(pull(data))) } else { abort("`data.table::melt()` doesn't currently support melting of unbalanced datasets.") } } } else if (multiple_names_to) { if (is.null(names_sep) && is.null(names_pattern)) { abort("If you supply multiple names in `names_to` you must also supply `names_sep` or `names_pattern`") } else if (!is.null(names_sep) && !is.null(names_pattern)) { abort("only one of names_sep or names_pattern should be provided") } } else { variable_name <- names_to } args <- list( measure.vars = measure_vars, variable.name = variable_name, value.name = values_to, na.rm = values_drop_na, variable.factor = FALSE ) # Clean up call args if defaults are used if (variable_name == "variable") { args$variable.name <- NULL } if (identical(values_to, "value")) { args$value.name <- NULL } if (is_false(values_drop_na)) { args$na.rm <- NULL } sim_vars <- names(sim_data) id_vars <- sim_vars[!sim_vars %in% unlist(measure_vars)] out <- step_call( data, "melt", args = args, vars = c(id_vars, variable_name, values_to) ) if (!is.null(names_prefix)) { out <- mutate(out, !!variable_name := gsub(paste0("^", names_prefix), "", !!sym(variable_name))) } if (multiple_names_to && uses_dot_value) { out <- mutate(out, !!variable_name := !!.value_id) } else if (multiple_names_to && !uses_dot_value) { if (!is.null(names_sep)) { into_cols <- str_separate(pull(out, !!sym(variable_name)), names_to, sep = names_sep) } else { into_cols <- str_extract(pull(out, !!sym(variable_name)), into = names_to, regex = names_pattern) } out <- mutate(out, !!!into_cols) # Need to drop variable_name and move names_to vars to correct position # Recreates relocate logic so only select is necessary, not relocate + select out_vars <- out$vars var_idx <- which(out_vars == variable_name) before_vars <- out_vars[seq_along(out_vars) < var_idx] after_vars <- out_vars[seq_along(out_vars) > var_idx] out <- select(out, !!!syms(before_vars), !!!syms(names_to), !!!syms(after_vars)) } else if (!multiple_names_to && uses_dot_value) { out <- mutate(out, variable = NULL) } step_repair(out, repair = names_repair) } # exported onLoad pivot_longer.data.table <- function(data, cols, names_to = "name", names_prefix = NULL, names_sep = NULL, names_pattern = NULL, names_ptypes = NULL, names_transform = NULL, names_repair = "check_unique", values_to = "value", values_drop_na = FALSE, values_ptypes = NULL, values_transform = NULL, ...) { data <- lazy_dt(data) tidyr::pivot_longer( data = data, cols = {{ cols }}, names_to = names_to, names_prefix = names_prefix, names_sep = names_sep, names_pattern = names_pattern, names_ptypes = names_ptypes, names_transform = names_transform, names_repair = names_repair, values_to = values_to, values_drop_na = values_drop_na, values_ptypes = values_ptypes, values_transform = values_transform, ... ) } # ============================================================================== # inlined from tidyr # https://github.com/tidyverse/tidyr/issues/1103 # ============================================================================== # nocov start # str_extract() ----------------------------------------------------------------- str_extract <- function(x, into, regex, convert = FALSE) { stopifnot( is_string(regex), is_character(into) ) out <- str_match_first(x, regex) if (length(out) != length(into)) { stop( "`regex` should define ", length(into), " groups; ", ncol(out), " found.", call. = FALSE ) } # Handle duplicated names if (anyDuplicated(into)) { pieces <- split(out, into) into <- names(pieces) out <- lapply(pieces, pmap_chr, paste0, sep = "") } into <- as_utf8_character(into) non_na_into <- !is.na(into) out <- out[non_na_into] names(out) <- into[non_na_into] if (convert) { out[] <- lapply(out, utils::type.convert, as.is = TRUE) } out } str_match_first <- function(string, regex) { loc <- regexpr(regex, string, perl = TRUE) loc <- group_loc(loc) out <- lapply( seq_len(loc$matches), function(i) substr(string, loc$start[, i], loc$end[, i]) ) out[-1] } group_loc <- function(x) { start <- cbind(as.vector(x), attr(x, "capture.start")) end <- start + cbind(attr(x, "match.length"), attr(x, "capture.length")) - 1L no_match <- start == -1L start[no_match] <- NA end[no_match] <- NA list(matches = ncol(start), start = start, end = end) } # str_separate() ----------------------------------------------------------------- str_separate <- function(x, into, sep, convert = FALSE, extra = "warn", fill = "warn") { if (!is.character(into)) { abort("`into` must be a character vector") } if (is.numeric(sep)) { out <- strsep(x, sep) } else if (is_character(sep)) { out <- data.table::tstrsplit(x, sep, fixed = TRUE, names = TRUE) out <- as_tibble(out) } else { abort("`sep` must be either numeric or character") } names(out) <- as_utf8_character(into) out <- out[!is.na(names(out))] if (convert) { out[] <- lapply(out, utils::type.convert, as.is = TRUE) } out } strsep <- function(x, sep) { nchar <- nchar(x) pos <- lapply(sep, function(i) { if (i >= 0) return(i) pmax(0, nchar + i) }) pos <- c(list(0), pos, list(nchar)) lapply(1:(length(pos) - 1), function(i) { substr(x, pos[[i]] + 1, pos[[i + 1]]) }) } str_split_n <- function(x, pattern, n_max = -1) { if (is.factor(x)) { x <- as.character(x) } m <- gregexpr(pattern, x, perl = TRUE) if (n_max > 0) { m <- lapply(m, function(x) slice_match(x, seq_along(x) < n_max)) } regmatches(x, m, invert = TRUE) } slice_match <- function(x, i) { structure( x[i], match.length = attr(x, "match.length")[i], index.type = attr(x, "index.type"), useBytes = attr(x, "useBytes") ) } list_indices <- function(x, max = 20) { if (length(x) > max) { x <- c(x[seq_len(max)], "...") } paste(x, collapse = ", ") } # pmap()/pmap_chr() ----------------------------------------------------------------- args_recycle <- function(args) { lengths <- vapply(args, length, integer(1)) n <- max(lengths) stopifnot(all(lengths == 1L | lengths == n)) to_recycle <- lengths == 1L args[to_recycle] <- lapply(args[to_recycle], function(x) rep.int(x, n)) args } pmap <- function(.l, .f, ...) { args <- args_recycle(.l) do.call("mapply", c( FUN = list(quote(.f)), args, MoreArgs = quote(list(...)), SIMPLIFY = FALSE, USE.NAMES = FALSE )) } pmap_chr <- function(.l, .f, ...) { as.character(pmap(.l, .f, ...)) } # nocov end dtplyr/R/complete.R0000644000176200001440000000203514031070705013721 0ustar liggesusers#' Complete a data frame with missing combinations of data #' #' @description #' This is a method for the tidyr `complete()` generic. This is a wrapper #' around `dtplyr` translations for `expand()`, `full_join()`, and `replace_na()` #' that's useful for completing missing combinations of data. #' #' @param data A [lazy_dt()]. #' @inheritParams tidyr::complete #' @examples #' library(tidyr) #' tbl <- tibble(x = 1:2, y = 1:2, z = 3:4) #' dt <- lazy_dt(tbl) #' #' dt %>% #' complete(x, y) #' #' dt %>% #' complete(x, y, fill = list(z = 10L)) # exported onLoad complete.dtplyr_step <- function(data, ..., fill = list()) { dots <- enquos(...) dots <- dots[!vapply(dots, quo_is_null, logical(1))] if (length(dots) == 0) { return(data) } full <- tidyr::expand(data, !!!dots) full <- dplyr::full_join(full, data, by = full$vars) full <- tidyr::replace_na(full, replace = fill) full } # exported onLoad complete.data.table <- function(data, ..., fill = list()) { data <- lazy_dt(data) tidyr::complete(data, ..., fill = fill) } dtplyr/R/step-subset-separate.R0000644000176200001440000000610114126601265016175 0ustar liggesusers#' Separate a character column into multiple columns with a regular #' expression or numeric locations #' #' @description #' This is a method for the [tidyr::separate()] generic. It is translated to #' [data.table::tstrsplit()] in the `j` argument of `[.data.table`. #' #' @param data A [lazy_dt()]. #' @param col Column name or position. #' #' This argument is passed by expression and supports quasiquotation #' (you can unquote column names or column positions). #' @param into Names of new variables to create as character vector. #' Use `NA` to omit the variable in the output. #' @param sep Separator between columns. #' The default value is a regular expression that matches any sequence of non-alphanumeric values. #' @param remove If TRUE, remove the input column from the output data frame. #' @param convert If TRUE, will run type.convert() with as.is = TRUE on new columns. #' This is useful if the component columns are integer, numeric or logical. #' #' NB: this will cause string "NA"s to be converted to NAs. #' @param ... Arguments passed on to methods #' @examples #' library(tidyr) #' # If you want to split by any non-alphanumeric value (the default): #' df <- lazy_dt(data.frame(x = c(NA, "x.y", "x.z", "y.z")), "DT") #' df %>% separate(x, c("A", "B")) #' #' # If you just want the second variable: #' df %>% separate(x, c(NA, "B")) #' #' # Use regular expressions to separate on multiple characters: #' df <- lazy_dt(data.frame(x = c(NA, "x?y", "x.z", "y:z")), "DT") #' df %>% separate(x, c("A","B"), sep = "([.?:])") #' #' # convert = TRUE detects column classes: #' df <- lazy_dt(data.frame(x = c("x:1", "x:2", "y:4", "z", NA)), "DT") #' df %>% separate(x, c("key","value"), ":") %>% str #' df %>% separate(x, c("key","value"), ":", convert = TRUE) %>% str # exported onLoad separate.dtplyr_step <- function(data, col, into, sep = "[^[:alnum:]]+", remove = TRUE, convert = FALSE, ...) { stopifnot(is.character(into)) stopifnot(is.character(sep)) col <- enexpr(col) into_length <- length(into) not_na_into <- !is.na(into) keep <- seq_along(into)[not_na_into] into <- into[not_na_into] t_str_split <- call2("tstrsplit", col, split = sep) if (length(keep) < into_length) { t_str_split$keep <- keep } if (isTRUE(convert)) { t_str_split$type.convert <- TRUE } out <- step_subset( data, vars = union(data$vars, into), j = call2(":=", into, t_str_split), needs_copy = if (data$implicit_copy) FALSE else TRUE ) if (remove && !as.character(col) %in% into) { out <- select(out, -!!col) } out } # exported onLoad separate.data.table <- function(data, col, into, sep = "[^[:alnum:]]+", remove = TRUE, convert = FALSE, ...) { data <- lazy_dt(data) tidyr::separate( data, col = {{ col }}, into = into, sep = sep, remove = remove, convert = convert, ... ) } dtplyr/R/step-subset-summarise.R0000644000176200001440000000717114126601265016406 0ustar liggesusers #' Summarise each group to one row #' #' This is a method for the dplyr [summarise()] generic. It is translated to #' the `j` argument of `[.data.table`. #' #' @param .data A [lazy_dt()]. #' @inheritParams dplyr::summarise #' @importFrom dplyr summarise #' @param .groups \Sexpr[results=rd]{lifecycle::badge("experimental")} Grouping structure of the result. #' #' * "drop_last": dropping the last level of grouping. This was the #' only supported option before version 1.0.0. #' * "drop": All levels of grouping are dropped. #' * "keep": Same grouping structure as `.data`. #' #' When `.groups` is not specified, it defaults to "drop_last". #' #' In addition, a message informs you of that choice, unless the result is ungrouped, #' the option "dplyr.summarise.inform" is set to `FALSE`, #' or when `summarise()` is called from a function in a package. #' @export #' @examples #' library(dplyr, warn.conflicts = FALSE) #' #' dt <- lazy_dt(mtcars) #' #' dt %>% #' group_by(cyl) %>% #' summarise(vs = mean(vs)) #' #' dt %>% #' group_by(cyl) %>% #' summarise(across(disp:wt, mean)) summarise.dtplyr_step <- function(.data, ..., .groups = NULL) { dots <- capture_dots(.data, ...) check_summarise_vars(dots) if (length(dots) == 0) { if (length(.data$groups) == 0) { out <- step_subset_j(.data, vars = character(), j = 0L) } else { # Acts like distinct on grouping vars out <- distinct(.data, !!!syms(.data$groups)) } } else { out <- step_subset_j( .data, vars = union(.data$groups, names(dots)), j = call2(".", !!!dots) ) } replaced_group_vars <- intersect(.data$groups, names(dots)) if (!is_empty(replaced_group_vars)) { out <- step_subset( out, groups = character(), j = expr(!!replaced_group_vars := NULL) ) } out_groups <- summarise_groups(.data, .groups, caller_env()) step_group(out, groups = out_groups) } #' @export summarise.data.table <- function(.data, ..., .groups = NULL) { .data <- lazy_dt(.data) summarise(.data, ..., .groups = .groups) } # For each expression, check if it uses any newly created variables check_summarise_vars <- function(dots) { for (i in seq_along(dots)) { used_vars <- all_names(get_expr(dots[[i]])) cur_vars <- names(dots)[seq_len(i - 1)] if (any(used_vars %in% cur_vars)) { abort(paste0( "`", names(dots)[[i]], "` ", "refers to a variable created earlier in this summarise().\n", "Do you need an extra mutate() step?" )) } } } summarise_groups <- function(.data, .groups, env_caller) { if (!is.null(.groups) && !.groups %in% c("drop_last", "drop", "keep")) { abort(c( paste0( "`.groups` can't be ", as_label(.groups), if (.groups == "rowwise") " in dtplyr" ), i = 'Possible values are NULL (default), "drop_last", "drop", and "keep"' )) } group_vars <- .data$groups n <- length(group_vars) verbose <- summarise_verbose(.groups, env_caller) if (verbose && n > 1) { new_groups <- glue::glue_collapse(paste0("'", group_vars[-n], "'"), sep = ", ") summarise_inform("has grouped output by {new_groups}") } .groups <- .groups %||% "drop_last" switch(.groups, drop_last = group_vars[-n], keep = group_vars, drop = character() ) } summarise_verbose <- function(.groups, .env) { is.null(.groups) && is_reference(topenv(.env), global_env()) && !identical(getOption("dplyr.summarise.inform"), FALSE) } summarise_inform <- function(..., .env = parent.frame()) { inform(paste0( "`summarise()` ", glue::glue(..., .envir = .env), '. You can override using the `.groups` argument.' )) } dtplyr/R/replace_na.R0000644000176200001440000000274314021443044014207 0ustar liggesusers#' Replace NAs with specified values #' #' @description #' This is a method for the tidyr `replace_na()` generic. It is translated to #' [data.table::fcoalesce()]. #' #' Note that unlike `tidyr::replace_na()`, `data.table::fcoalesce()` cannot #' replace `NULL` values in lists. #' #' @inheritParams tidyr::replace_na #' @param data A [lazy_dt()]. #' @examples #' library(tidyr) #' #' # Replace NAs in a data frame #' dt <- lazy_dt(tibble(x = c(1, 2, NA), y = c("a", NA, "b"))) #' dt %>% replace_na(list(x = 0, y = "unknown")) #' #' # Replace NAs using `dplyr::mutate()` #' dt %>% dplyr::mutate(x = replace_na(x, 0)) # exported onLoad replace_na.dtplyr_step <- function(data, replace = list()) { stopifnot(is.list(replace)) if (length(replace) == 0) { return(data) } sim_data <- simulate_vars(data) replace_vars <- intersect(names(replace), names(sim_data)) replace_calls <- vector("list", length(replace_vars)) names(replace_calls) <- replace_vars for (i in seq_along(replace_vars)) { var <- replace_vars[[i]] check_replacement(replace[[i]], var) replace_calls[[i]] <- call2("fcoalesce", sym(var), replace[[i]]) } mutate(data, !!!replace_calls) } # exported onLoad replace_na.data.table <- function(data, replace = list()) { data <- lazy_dt(data) tidyr::replace_na(data, replace = replace) } check_replacement <- function(x, var) { n <- length(x) if (n == 1) { return() } abort(glue::glue("Replacement for `{var}` is length {n}, not length 1")) } dtplyr/R/fill.R0000644000176200001440000000535414015730574013057 0ustar liggesusers#' Fill in missing values with previous or next value #' #' @description #' This is a method for the tidyr `fill()` generic. It is translated to #' [data.table::nafill()]. Note that `data.table::nafill()` currently only #' works for integer and double columns. #' #' @inheritParams tidyr::fill #' @examples #' library(tidyr) #' #' # Value (year) is recorded only when it changes #' sales <- lazy_dt(tibble::tribble( #' ~quarter, ~year, ~sales, #' "Q1", 2000, 66013, #' "Q2", NA, 69182, #' "Q3", NA, 53175, #' "Q4", NA, 21001, #' "Q1", 2001, 46036, #' "Q2", NA, 58842, #' "Q3", NA, 44568, #' "Q4", NA, 50197, #' "Q1", 2002, 39113, #' "Q2", NA, 41668, #' "Q3", NA, 30144, #' "Q4", NA, 52897, #' "Q1", 2004, 32129, #' "Q2", NA, 67686, #' "Q3", NA, 31768, #' "Q4", NA, 49094 #' )) #' #' # `fill()` defaults to replacing missing data from top to bottom #' sales %>% fill(year) #' #' # Value (n_squirrels) is missing above and below within a group #' squirrels <- lazy_dt(tibble::tribble( #' ~group, ~name, ~role, ~n_squirrels, #' 1, "Sam", "Observer", NA, #' 1, "Mara", "Scorekeeper", 8, #' 1, "Jesse", "Observer", NA, #' 1, "Tom", "Observer", NA, #' 2, "Mike", "Observer", NA, #' 2, "Rachael", "Observer", NA, #' 2, "Sydekea", "Scorekeeper", 14, #' 2, "Gabriela", "Observer", NA, #' 3, "Derrick", "Observer", NA, #' 3, "Kara", "Scorekeeper", 9, #' 3, "Emily", "Observer", NA, #' 3, "Danielle", "Observer", NA #' )) #' #' # The values are inconsistently missing by position within the group #' # Use .direction = "downup" to fill missing values in both directions #' squirrels %>% #' dplyr::group_by(group) %>% #' fill(n_squirrels, .direction = "downup") %>% #' dplyr::ungroup() #' #' # Using `.direction = "updown"` accomplishes the same goal in this example # exported onLoad fill.dtplyr_step <- function(data, ..., .direction = c("down", "up", "downup", "updown")) { dots <- enquos(...) .direction <- arg_match(.direction) if (.direction %in% c("down", "up")) { type <- switch(.direction, "down" = "locf", "up" = "nocb") mutate(data, dplyr::across(c(!!!dots), nafill, type)) } else { if (.direction == "downup") { type1 <- "locf" type2 <- "nocb" } else { type1 <- "nocb" type2 <- "locf" } mutate(data, dplyr::across(c(!!!dots), ~ nafill(nafill(.x, type1), type2))) } } # exported onLoad fill.data.table <- function(data, ..., .direction = c("down", "up", "downup", "updown")) { data <- lazy_dt(data) tidyr::fill(data, ..., .direction = .direction) } dtplyr/R/step-modify.R0000644000176200001440000000401714006775461014370 0ustar liggesusersstep_modify <- function(parent, fun, args) { new_step( parent, groups = parent$groups, arrange = parent$arrange, implicit_copy = TRUE, fun = fun, args = args, class = "dtplyr_step_modify" ) } #' @export dt_call.dtplyr_step_modify <- function(x, needs_copy = x$needs_copy) { j <- call2(x$fun, quote(.SD), quote(.BY), !!!x$args) out <- call2("[", dt_call(x$parent, needs_copy), , j) add_grouping_param(out, x, arrange = FALSE) } # dplyr methods ----------------------------------------------------------- #' Apply a function to each group #' #' These are methods for the dplyr [group_map()] and [group_modify()] generics. #' They are both translated to `[.data.table`. #' #' @param .tbl A [lazy_dt()] #' @param .f The name of a two argument function. The first argument is passed #' `.SD`,the data.table representing the current group; the second argument #' is passed `.BY`, a list giving the current values of the grouping #' variables. The function should return a list or data.table. #' @param ... Additional arguments passed to `.f` #' @param keep Not supported for [lazy_dt]. #' @returns `group_map()` applies `.f` to each group, returning a list. #' `group_modify()` replaces each group with the results of `.f`, returning a #' modified [lazy_dt()]. #' @importFrom dplyr group_modify #' @export #' @examples #' library(dplyr) #' #' dt <- lazy_dt(mtcars) #' #' dt %>% #' group_by(cyl) %>% #' group_modify(head, n = 2L) #' #' dt %>% #' group_by(cyl) %>% #' group_map(head, n = 2L) group_modify.dtplyr_step <- function(.tbl, .f, ..., keep = FALSE) { if (!missing(keep)) { abort("`keep` is not supported for lazy data tables") } .f <- ensym(.f) args <- enquos(...) step_modify(.tbl, fun = .f, args = args) } #' @importFrom dplyr group_map #' @rdname group_modify.dtplyr_step #' @export group_map.dtplyr_step <- function(.tbl, .f, ..., keep = FALSE) { .f <- as_function(.f, caller_env()) dt <- as.data.table(.tbl) dt[, list(list(.f(.SD, .BY, ...))), by = eval(.tbl$groups)]$V1 } dtplyr/R/tidyeval-across.R0000644000176200001440000000702114126601265015230 0ustar liggesuserscapture_across <- function(data, x, j = TRUE) { x <- enquo(x) dt_squash_across(get_expr(x), get_env(x), data, j) } dt_squash_across <- function(call, env, data, j = j) { call <- match.call(dplyr::across, call, expand.dots = FALSE, envir = env) tbl <- simulate_vars(data, drop_groups = TRUE) .cols <- call$.cols %||% expr(everything()) locs <- tidyselect::eval_select(.cols, tbl) cols <- syms(names(tbl))[locs] funs <- across_funs(call$.fns, env, data, j = j) dots <- call$... dots <- lapply(dots, dt_squash, env = env, data = data, j = j) # Generate grid of expressions out <- vector("list", length(cols) * length(funs)) k <- 1 for (i in seq_along(cols)) { for (j in seq_along(funs)) { out[[k]] <- exec(funs[[j]], cols[[i]], !!!dots) k <- k + 1 } } .names <- eval(call$.names, env) names(out) <- across_names(names(locs), names(funs), .names, env) out } capture_if_all <- function(data, x, j = TRUE) { x <- enquo(x) dt_squash_if(get_expr(x), get_env(x), data, j) } dt_squash_if <- function(call, env, data, j = j, reduce = "&") { call <- match.call(dplyr::if_any, call, expand.dots = FALSE, envir = env) tbl <- simulate_vars(data, drop_groups = TRUE) .cols <- call$.cols %||% expr(everything()) locs <- tidyselect::eval_select(.cols, tbl, allow_rename = FALSE) cols <- syms(names(tbl))[locs] fun <- across_fun(call$.fns, env, data, j = j) out <- vector("list", length(cols)) for (i in seq_along(cols)) { out[[i]] <- exec(fun, cols[[i]], !!!call$...) } Reduce(function(x, y) call2(reduce, x, y), out) } across_funs <- function(funs, env, data, j = TRUE) { if (is.null(funs)) { list(function(x, ...) x) } else if (is_symbol(funs) || is_function(funs)) { set_names(list(across_fun(funs, env, data, j = j)), as_label(funs)) } else if (is.character(funs)) { names(funs)[names2(funs) == ""] <- funs lapply(funs, across_fun, env, data, j = j) } else if (is_call(funs, "~")) { set_names(list(across_fun(funs, env, data, j = j)), expr_name(f_rhs(funs))) } else if (is_call(funs, "list")) { args <- rlang::exprs_auto_name(funs[-1]) lapply(args, across_fun, env, data, j = j) } else if (!is.null(env)) { # Try evaluating once, just in case funs <- eval(funs, env) across_funs(funs, NULL) } else { abort("`.fns` argument to dtplyr::across() must be a NULL, a function, formula, or list") } } across_fun <- function(fun, env, data, j = TRUE) { if (is_symbol(fun) || is_string(fun) || is_call(fun, "function") || is_function(fun)) { function(x, ...) call2(fun, x, ...) } else if (is_call(fun, "~")) { call <- dt_squash_formula(fun, env, data, j = j, replace = quote(!!.x)) function(x, ...) expr_interp(call, child_env(emptyenv(), .x = x)) } else { abort(c( ".fns argument to dtplyr::across() must contain a function or a formula", x = paste0("Problem with ", expr_deparse(fun)) )) } } dt_squash_formula <- function(x, env, data, j = TRUE, replace = quote(!!.x)) { call <- f_rhs(x) call <- replace_dot(call, replace) if (is_call(call)) { call <- dt_squash_call(call, env, data, j = j) } call } across_names <- function(cols, funs, names = NULL, env = parent.frame()) { n_reps <- if (is_empty(funs)) 1 else length(funs) if (n_reps == 1) { names <- names %||% "{.col}" } else { names <- names %||% "{.col}_{.fn}" } glue_env <- child_env(env, .col = rep(cols, each = n_reps), .fn = rep(funs %||% seq_len(n_reps), length(cols)) ) glue::glue(names, .envir = glue_env) } dtplyr/R/step-subset.R0000644000176200001440000000631514126602141014374 0ustar liggesusersstep_subset <- function(parent, vars = parent$vars, groups = parent$groups, locals = parent$locals, arrange = parent$arrange, i = NULL, j = NULL, on = character(), allow_cartesian = NULL, needs_copy = FALSE ) { stopifnot(is_step(parent)) stopifnot(is_expression(i) || is_call(i) || is_step(i)) stopifnot(is_expression(j) || is_call(j)) stopifnot(is.character(on)) new_step( parent = parent, vars = vars, groups = groups, locals = locals, arrange = arrange, i = i, j = j, on = on, allow_cartesian = allow_cartesian, implicit_copy = !is.null(i) || !is.null(j), needs_copy = needs_copy || parent$needs_copy, class = "dtplyr_step_subset" ) } # Grouped i needs an intermediate assignment for maximum efficiency step_subset_i <- function(parent, i) { if (is_empty(i)) { return(parent) } if (length(parent$groups) > 0) { parent <- compute(parent) nm <- sym(parent$name) i <- expr((!!nm)[, .I[!!i]]) # dt[, .I[]] i <- add_grouping_param(i, parent, FALSE) # dt[, .I[], by = ()] i <- call("$", i, quote(V1)) # dt[, .I[], by = ()]$V1 } step_subset(parent, i = i) } # When adding a subset that contains only j, it may be possible to merge # the previous step. step_subset_j <- function(parent, vars = parent$vars, groups = parent$groups, arrange = parent$arrange, j = NULL) { if (can_merge_subset(parent)) { i <- parent$i on <- parent$on parent <- parent$parent } else { i <- NULL on <- character() } step_subset( parent, vars = vars, groups = groups, arrange = arrange, i = i, j = j, on = on ) } can_merge_subset <- function(x) { # Can only merge subsets if (!inherits(x, "dtplyr_step_subset")) { return(FALSE) } # Don't need to check that groups are identical because the only # dplyr functions that generate expression in i are # filter/slice/sample/arrange/join and don't affect groups is.null(x$j) } #' @export dt_sources.dtplyr_step_subset <- function(x) { # TODO: need to throw error if same name refers to different tables. if (is_step(x$i)) { utils::modifyList(dt_sources(x$parent), dt_sources(x$i)) } else { dt_sources(x$parent) } } #' @export dt_call.dtplyr_step_subset <- function(x, needs_copy = x$needs_copy) { if (is.null(x$i) && is.null(x$j)) { return(dt_call(x$parent)) } i <- if (is_step(x$i)) dt_call(x$i) else x$i parent <- dt_call(x$parent, needs_copy) if (is.null(i) && is.null(x$j)) { out <- parent } else if (is.null(i) && !is.null(x$j)) { out <- call2("[", parent, , x$j) } else if (!is.null(i) && is.null(x$j)) { out <- call2("[", parent, i) } else { out <- call2("[", parent, i, x$j) } if (!is.null(x$j)) { out <- add_grouping_param(out, x) } if (length(x$on) > 0) { out$on <- call2(".", !!!syms(x$on)) out$allow.cartesian <- x$allow_cartesian } out } dtplyr/R/step-subset-transmute.R0000644000176200001440000000301514126601265016414 0ustar liggesusers#' Create new columns, dropping old #' #' This is a method for the dplyr [transmute()] generic. It is translated to #' the `j` argument of `[.data.table`. #' #' @param .data A [lazy_dt()]. #' @inheritParams mutate.dtplyr_step #' @importFrom dplyr transmute #' @export #' @examples #' library(dplyr, warn.conflicts = FALSE) #' #' dt <- lazy_dt(dplyr::starwars) #' dt %>% transmute(name, sh = paste0(species, "/", homeworld)) transmute.dtplyr_step <- function(.data, ...) { dots <- capture_dots(.data, ...) nested <- nested_vars(.data, dots, .data$vars) groups <- group_vars(.data) if (!is_empty(groups)) { # TODO could check if there is actually anything mutated, e.g. to avoid # DT[, .(x = x)] is_group_var <- names(dots) %in% groups group_dots <- dots[is_group_var] .data <- mutate(ungroup(.data), !!!group_dots) .data <- group_by(.data, !!!syms(groups)) dots <- dots[!is_group_var] } if (is_empty(dots)) { # grouping variables have been removed from `dots` so `select()` would # produce a message "Adding grouping vars". # As `dplyr::transmute()` doesn't generate a message when adding group vars # we can also leave it away here return(select(.data, !!!group_vars(.data))) } if (!nested) { j <- call2(".", !!!dots) } else { j <- mutate_nested_vars(dots)$expr } vars <- union(group_vars(.data), names(dots)) step_subset_j(.data, vars = vars, j = j) } #' @export transmute.data.table <- function(.data, ...) { .data <- lazy_dt(.data) transmute(.data, ...) } dtplyr/R/step-group.R0000644000176200001440000000751314150757413014236 0ustar liggesusersstep_group <- function(parent, groups = parent$groups, arrange = parent$arrange) { if (can_step_group_return_early(parent, groups, arrange)) { return(parent) } new_step( parent, vars = parent$vars, groups = groups, class = "dtplyr_step_group", arrange = arrange, name = parent$name ) } #' @export dt_has_computation.dtplyr_step_group <- function(x) { dt_has_computation(x$parent) } add_grouping_param <- function(call, step, arrange = step$arrange) { if (length(step$groups) == 0) { return(call) } arrange <- arrange %||% TRUE using <- if (isTRUE(arrange)) "keyby" else "by" call[[using]] <- call2(".", !!!syms(step$groups)) call } # dplyr methods ----------------------------------------------------------- #' Group and ungroup #' #' These are methods for dplyr's [group_by()] and [ungroup()] generics. #' Grouping is translated to the either `keyby` and `by` argument of #' `[.data.table` depending on the value of the `arrange` argument. #' #' @inheritParams dplyr::group_by #' @param .data A [lazy_dt()] #' @param arrange If `TRUE`, will automatically arrange the output of #' subsequent grouped operations by group. If `FALSE`, output order will be #' left unchanged. In the generated data.table code this switches between #' using the `keyby` (`TRUE`) and `by` (`FALSE`) arguments. #' @param .add,add When `FALSE`, the default, `group_by()` will #' override existing groups. To add to the existing groups, use #' `.add = TRUE`. #' #' This argument was previously called `add`, but that prevented #' creating a new grouping variable called `add`, and conflicts with #' our naming conventions. #' @importFrom dplyr group_by #' @export #' @examples #' library(dplyr, warn.conflicts = FALSE) #' dt <- lazy_dt(mtcars) #' #' # group_by() is usually translated to `keyby` so that the groups #' # are ordered in the output #' dt %>% #' group_by(cyl) %>% #' summarise(mpg = mean(mpg)) #' #' # use `arrange = FALSE` to instead use `by` so the original order #' # or groups is preserved #' dt %>% #' group_by(cyl, arrange = FALSE) %>% #' summarise(mpg = mean(mpg)) group_by.dtplyr_step <- function(.data, ..., .add = FALSE, add = deprecated(), arrange = TRUE) { dots <- capture_dots(.data, ...) dots <- exprs_auto_name(dots) dots <- dots[!vapply(dots, is.null, logical(1))] if (lifecycle::is_present(add)) { lifecycle::deprecate_warn( "1.0.0", "dplyr::group_by(add = )", "group_by(.add = )" ) .add <- add } existing <- vapply( seq_along(dots), function(i) { x <- dots[[i]] name <- names(dots)[[i]] is_symbol(x) && (as_name(x) == name) }, logical(1) ) if (!all(existing)) { .data <- mutate(.data, !!!dots[!existing]) dots[!existing] <- syms(names(dots[!existing])) } groups <- c(if (.add) .data$groups, names(dots)) %||% character() arranged <- if (!is.null(.data$arrange)) .data$arrange && arrange else arrange step_group(.data, groups, arranged) } can_step_group_return_early <- function(parent, groups, arrange) { if (is_empty(groups)) { return(is_empty(parent$groups)) } same_arrange <- (is_false(arrange) || identical(arrange, parent$arrange)) same_groups <- identical(groups, parent$groups) same_arrange && same_groups } #' @export group_by.data.table <- function(.data, ...) { .data <- lazy_dt(.data) group_by(.data, ...) } #' @importFrom dplyr ungroup #' @export #' @rdname group_by.dtplyr_step ungroup.dtplyr_step <- function(.data, ...) { if (missing(...)) { step_group(.data, groups = character()) } else { old_groups <- group_vars(.data) to_remove <- tidyselect::vars_select(.data$vars, ...) new_groups <- setdiff(old_groups, to_remove) step_group(.data, groups = new_groups) } } #' @export ungroup.data.table <- function(.data, ...) { abort("Can't ungroup a data.table") } dtplyr/R/step-join.R0000644000176200001440000002241314127361773014041 0ustar liggesusersstep_join <- function(x, y, on, style, suffix = c(".x", ".y")) { stopifnot(is_step(x)) stopifnot(is_step(y)) stopifnot(is.null(on) || is.character(on)) style <- match.arg(style, c("inner", "full", "right", "left", "semi", "anti")) if (is_character(on, 0)) { return(cross_join(x, y)) } on <- dplyr::common_by(on, x, y) vars_out_dt <- dt_join_vars(x$vars, y$vars, on$x, on$y, suffix = suffix, style = style) colorder <- dt_join_colorder(x$vars, y$vars, on$x, on$y, style) # TODO suppress warning in merge # "column names ... are duplicated in the result out <- new_step( parent = if (style == "left") y else x, implicit_copy = TRUE, parent2 = if (style == "left") x else y, vars = vars_out_dt, on = if (style %in% c("left", "full")) on else list(x = on$y, y = on$x), style = style, locals = utils::modifyList(x$locals, y$locals), class = "dtplyr_step_join" ) if (style %in% c("anti", "semi")) { return(out) } out <- step_colorder(out, colorder) x_sim <- simulate_vars(x) y_sim <- simulate_vars(y) vars <- dplyr_join_vars(x_sim, y_sim, on$x, on$y, suffix = suffix) if (any(duplicated(vars_out_dt))) { step_setnames(out, colorder, vars, in_place = FALSE) } else { step_setnames(out, vars_out_dt[colorder], vars, in_place = FALSE) } } cross_join <- function(x, y) { xy <- left_join( mutate(x, .cross_join_col = 1), mutate(y, .cross_join_col = 1), by = ".cross_join_col" ) # use custom select to produce way shorter query step_subset_j( xy, vars = setdiff(xy$vars, ".cross_join_col"), j = expr(!".cross_join_col") ) } #' @export dt_sources.dtplyr_step_join <- function(x) { # TODO: need to throw error if same name refers to different tables. utils::modifyList(dt_sources(x$parent), dt_sources(x$parent2)) } #' @export dt_call.dtplyr_step_join <- function(x, needs_copy = x$needs_copy) { lhs <- dt_call(x$parent, needs_copy) rhs <- dt_call(x$parent2) on2 <- simplify_names(stats::setNames(x$on$x, x$on$y)) on <- call2(".", !!!syms(on2)) switch(x$style, full = call2("merge", lhs, rhs, all = TRUE, by.x = x$on$x, by.y = x$on$y, allow.cartesian = TRUE), left = call2("[", lhs, rhs, on = on, allow.cartesian = TRUE), inner = call2("[", lhs, rhs, on = on, nomatch = NULL, allow.cartesian = TRUE), right = call2("[", lhs, rhs, on = on, allow.cartesian = TRUE), anti = call2("[", lhs, call2("!", rhs), on = on), semi = call2("[", lhs, call2("unique", call2("[", lhs, rhs, which = TRUE, nomatch = NULL, on = on))) ) } # dplyr verbs ------------------------------------------------------------- #' Join data tables #' #' These are methods for the dplyr generics [left_join()], [right_join()], #' [inner_join()], [full_join()], [anti_join()], and [semi_join()]. Left, right, #' inner, and anti join are translated to the `[.data.table` equivalent, #' full joins to [data.table::merge.data.table()]. #' Left, right, and full joins are in some cases followed by calls to #' [data.table::setcolorder()] and [data.table::setnames()] to ensure that column #' order and names match dplyr conventions. #' Semi-joins don't have a direct data.table equivalent. #' #' @param x,y A pair of [lazy_dt()]s. #' @inheritParams dplyr::left_join #' @importFrom dplyr left_join #' @export #' @examples #' library(dplyr, warn.conflicts = FALSE) #' #' band_dt <- lazy_dt(dplyr::band_members) #' instrument_dt <- lazy_dt(dplyr::band_instruments) #' #' band_dt %>% left_join(instrument_dt) #' band_dt %>% right_join(instrument_dt) #' band_dt %>% inner_join(instrument_dt) #' band_dt %>% full_join(instrument_dt) #' #' band_dt %>% semi_join(instrument_dt) #' band_dt %>% anti_join(instrument_dt) left_join.dtplyr_step <- function(x, y, ..., by = NULL, copy = FALSE, suffix = c(".x", ".y")) { y <- dtplyr_auto_copy(x, y, copy = copy) step_join(x, y, by, style = "left", suffix = suffix) } #' @export left_join.data.table <- function(x, y, ...) { x <- lazy_dt(x) left_join(x, y, ...) } #' @importFrom dplyr right_join #' @export right_join.dtplyr_step <- function(x, y, ..., by = NULL, copy = FALSE, suffix = c(".x", ".y")) { y <- dtplyr_auto_copy(x, y, copy = copy) step_join(x, y, by, style = "right", suffix = suffix) } #' @export right_join.data.table <- function(x, y, ...) { x <- lazy_dt(x) right_join(x, y, ...) } #' @importFrom dplyr inner_join #' @export inner_join.dtplyr_step <- function(x, y, ..., by = NULL, copy = FALSE, suffix = c(".x", ".y")) { y <- dtplyr_auto_copy(x, y, copy = copy) step_join(x, y, on = by, style = "inner", suffix = suffix) } #' @export inner_join.data.table <- function(x, y, ...) { x <- lazy_dt(x) inner_join(x, y, ...) } #' @importFrom dplyr full_join #' @export full_join.dtplyr_step <- function(x, y, ..., by = NULL, copy = FALSE, suffix = c(".x", ".y")) { y <- dtplyr_auto_copy(x, y, copy = copy) step_join(x, y, on = by, style = "full", suffix = suffix) } #' @export full_join.data.table <- function(x, y, ...) { x <- lazy_dt(x) full_join(x, y, ...) } #' @importFrom dplyr anti_join #' @export anti_join.dtplyr_step <- function(x, y, ..., by = NULL, copy = FALSE) { y <- dtplyr_auto_copy(x, y, copy = copy) step_join(x, y, on = by, style = "anti") } #' @export anti_join.data.table <- function(x, y, ...) { x <- lazy_dt(x) anti_join(x, y, ...) } #' @importFrom dplyr semi_join #' @export semi_join.dtplyr_step <- function(x, y, ..., by = NULL, copy = FALSE) { step_join(x, y, on = by, style = "semi") } #' @export semi_join.data.table <- function(x, y, ...) { x <- lazy_dt(x) semi_join(x, y, ...) } # helpers ----------------------------------------------------------------- dtplyr_auto_copy <- function(x, y, copy = copy) { if (is_step(y)) { y } else if (is.data.frame(y)) { # includes data tables lazy_dt(y) } else { dplyr::auto_copy(x, y, copy = copy) } } add_suffixes <- function (x, y, suffix) { x[x %in% y] <- paste0(x[x %in% y], suffix) x } dplyr_join_vars <- function(x, y, on_x, on_y, suffix) { colnames(left_join(x, y, by = stats::setNames(on_y, on_x), suffix = suffix)) } dt_join_vars <- function(x, y, on_x, on_y, suffix, style) { style <- match.arg(style, c("inner", "full", "right", "left", "semi", "anti")) if (style == "left") { # need to swap `x` and `y` as the data.table left join is `y[x, on]` subset_join_vars(y, x, on_y = on_x) } else if (style %in% c("right", "inner")) { subset_join_vars(x, y, on_y) } else if (style == "full") { merge_vars(x, y, on_x, on_y, suffix) } else { x } } # column names as generated in `x[y, on = on]` subset_join_vars <- function(x, y, on_y) { # `y` variables used for joining are not included again y_out <- setdiff(y, on_y) # remaining `y` columns that are also in `x` get _prefixed_ by "i." y_out[y_out %in% x] <- paste0("i.", y_out[y_out %in% x]) out_names <- c(x, y_out) add_dt_suffix(out_names) } add_dt_suffix <- function(x) { for (i in seq_along(x)) { j <- 1 nm <- x[[i]] first_occurrence <- !nm %in% x[seq(0, i - 1)] if (!first_occurrence) { while (nm %in% x[-i]) { nm <- paste0(x[[i]], ".", j) j <- j + 1 } } x[[i]] <- nm } x } # column names as generated by `merge(x, y, by.x = on_x, by.y = on_y)` merge_vars <- function(x, y, on_x, on_y, suffix = c(".x", ".y")) { x <- setdiff(x, on_x) y <- setdiff(y, on_y) x_out <- add_suffixes(x, y, suffix[[1]]) y_out <- add_suffixes(y, x, suffix[[2]]) c(on_x, x_out, y_out) } dt_join_colorder <- function(x, y, on_x, on_y, style) { style <- match.arg(style, c("inner", "full", "right", "left", "semi", "anti")) if (style == "left") { subset_left_join_colorder(x, y, on_x, on_y) } else if (style == "full") { merge_join_colorder(x, y, on_x, on_y) } else { seq(length(x) + length(y) - length(on_x)) } } #' column order of data.table left join `y[x]` compared to `left_join(y, x)` #' @noRd subset_left_join_colorder <- function(x, y, on_x, on_y) { # variable order # y[x, on]: y-vars, x-vars - on_x # left_join(x, y, on): x-vars, y-vars - on_y x_loc <- rep_along(x, NA_integer_) # locations of x-vars not used in `on_x` used_in_on_x <- x %in% on_x x_loc[!used_in_on_x] <- seq_along(x[!used_in_on_x]) + length(y) # locations of x-vars used in `on_x` # They have a matching column in `y`. Map `x-vars` according to `on_x` and `on_y` x <- dplyr::recode(x, !!!set_names(on_y, on_x)) x_loc[used_in_on_x] <- vctrs::vec_match(x[used_in_on_x], y) y_out_dt <- setdiff(y, on_y) y_loc <- vctrs::vec_match(y_out_dt, y) c(x_loc, y_loc) } merge_join_colorder <- function(x, y, on_x, on_y) { # variable order # merge(x, y, on_x, on_y): on_x, x-vars - on_x, y-vars - on_y # full_join(x, y, on): x-vars, y-vars - on_y x_out_dt <- setdiff(x, on_x) x_loc <- vctrs::vec_match(x, x_out_dt) + length(on_x) x_loc[is.na(x_loc)] <- vctrs::vec_match(x[is.na(x_loc)], on_x) n_x <- length(x) n_y_out <- length(y) - length(on_x) c(x_loc, n_x + seq2(1, n_y_out)) } #' @importFrom dplyr same_src #' @export same_src.dtplyr_step <- function(x, y) { is_step(y) } #' @importFrom dplyr auto_copy #' @export auto_copy.dtplyr_step <- function(x, y, copy = FALSE, ...) { lazy_dt(as.data.frame(y)) } # Needed to test auto_copy #' @export tbl_vars.foo <- function(x) "x" #' @export as.data.frame.foo <- function(x, ...) data.frame(x = 1:10) dtplyr/R/tidyeval.R0000644000176200001440000001665414127150472013754 0ustar liggesusersdt_eval <- function(x) { env <- as_environment(dt_sources(x), x$env) add_dt_wrappers(env) for (var in names(x$locals)) { env[[var]] <- eval(x$locals[[var]], env) } quo <- new_quosure(dt_call(x), env) eval_tidy(quo) } # Make sure data.table functions are available so dtplyr still works # even when data.table isn't attached dt_funs <- c( "between", "CJ", "copy", "data.table", "dcast", "melt", "nafill", "fcase", "fcoalesce", "fifelse", "fintersect", "frank", "frankv", "fsetdiff", "funion", "setcolorder", "setnames", "shift", "tstrsplit", "uniqueN" ) add_dt_wrappers <- function(env) { env_bind(env, !!!env_get_list(ns_env("data.table"), dt_funs)) } globalVariables(dt_funs) # These functions attempt to simulate tidy eval as much as possible within # data.table. The goal is to get the majority of real-world code to work, # without aiming for 100% compliance. capture_dots <- function(.data, ..., .j = TRUE) { dots <- enquos(..., .named = .j) dots <- lapply(dots, dt_squash, data = .data, j = .j) # Remove names from any list elements is_list <- vapply(dots, is.list, logical(1)) names(dots)[is_list] <- "" # Auto-splice list results from dt_squash() dots[!is_list] <- lapply(dots[!is_list], list) unlist(dots, recursive = FALSE) } capture_dot <- function(.data, x, j = TRUE) { dt_squash(enquo(x), data = .data, j = j) } # squash quosures dt_squash <- function(x, env, data, j = TRUE) { if (is_atomic(x) || is_null(x)) { x } else if (is_symbol(x)) { if (identical(x, quote(.))) { quote(.SD) } else { var <- as.character(x) if (var %in% c("T", "F")) { as.logical(var) } else if (nchar(x) > 0 && substr(var, 1, 1) == ".") { # data table pronouns are bound to NULL x } else if (!var %in% data$vars && env_has(env, var, inherit = TRUE)) { if (is_global(env)) { # Slightly dangerous because the variable might be modified # between creation and execution, but it seems like a reasonable # tradeoff in order to get a more natural translation. if (j) { # use .. to avoid data mask sym(paste0("..", var)) } else { # i doesn't provide a data mask x } } else { eval(x, env) } } else { x } } } else if (is_quosure(x)) { dt_squash(get_expr(x), get_env(x), data, j = j) } else if (is_call(x, "if_any")) { dt_squash_if(x, env, data, j = j, reduce = "|") } else if (is_call(x, "if_all")) { dt_squash_if(x, env, data, j = j, reduce = "&") } else if (is_call(x, "across")) { dt_squash_across(x, env, data, j = j) } else if (is_call(x)) { dt_squash_call(x, env, data, j = j) } else { abort("Invalid input") } } dt_squash_call <- function(x, env, data, j = TRUE) { if (is_mask_pronoun(x)) { var <- x[[3]] if (is_call(x, "[[")) { var <- sym(eval(var, env)) } if (is_symbol(x[[2]], ".data")) { var } else if (is_symbol(x[[2]], ".env")) { sym(paste0("..", var)) } } else if (is_call(x, c("coalesce", "replace_na"))) { args <- lapply(x[-1], dt_squash, env = env, data = data, j = j) call2("fcoalesce", !!!args) } else if (is_call(x, "case_when")) { # case_when(x ~ y) -> fcase(x, y) args <- unlist(lapply(x[-1], function(x) { list( # Get as "default" case as close as possible # https://github.com/Rdatatable/data.table/issues/4258 if (isTRUE(x[[2]]) || is_symbol(x[[2]], "T")) quote(rep(TRUE, .N)) else x[[2]], x[[3]] ) })) args <- lapply(args, dt_squash, env = env, data = data, j = j) call2("fcase", !!!args) } else if (is_call(x, "cur_data")) { quote(.SD) } else if (is_call(x, "cur_data_all")) { abort("`cur_data_all()` is not available in dtplyr") } else if (is_call(x, "cur_group")) { quote(.BY) } else if (is_call(x, "cur_group_id")) { quote(.GRP) } else if (is_call(x, "cur_group_rows")) { quote(.I) } else if (is_call(x, "desc")) { if (!has_length(x, 2L)) { abort("`desc()` expects exactly one argument.") } x[[1]] <- sym("-") x[[2]] <- get_expr(x[[2]]) x } else if (is_call(x, c("if_else", "ifelse"))) { if (is_call(x, "if_else")) { x <- unname(match.call(dplyr::if_else, x)) } else { x <- unname(match.call(ifelse, x)) } x[[1]] <- quote(fifelse) x[-1] <- lapply(x[-1], dt_squash, env, data, j = j) x } else if (is_call(x, c("lag", "lead"))) { if (is_call(x, "lag")) { type <- "lag" call <- match.call(dplyr::lag, x) } else { type <- "lead" call <- match.call(dplyr::lead, x) } call[-1] <- lapply(call[-1], dt_squash, env = env, data = data, j = j) shift_call <- call2("shift", x[[2]]) if (!is_null(call$n)) { shift_call$n <- call$n } if (!is_null(call$default)) { shift_call$fill <- call$default } if (!is_null(call$order_by)) { abort( glue::glue("The `order_by` argument of `{type}()` is not supported by dtplyr") ) } shift_call$type <- type shift_call } else if (is_call(x, "n", n = 0)) { quote(.N) } else if (is_call(x, "n_distinct")) { x <- match.call(dplyr::n_distinct, x, expand.dots = FALSE) dots <- x$... if (length(dots) == 1) { vec <- dots[[1]] } else { vec <- call2("data.table", !!!dots) } call <- call2("uniqueN", vec) if (!is_null(x$na.rm)) { call$na.rm <- x$na.rm } call } else if (is_call(x, "row_number", n = 0)) { quote(seq_len(.N)) } else if (is_call(x, "row_number", n = 1)) { arg <- dt_squash(x[[2]], env, data, j = j) expr(frank(!!arg, ties.method = "first", na.last = "keep")) } else if (is.function(x[[1]]) || is_call(x, "function")) { simplify_function_call(x, env, data, j = j) } else { x[-1] <- lapply(x[-1], dt_squash, env, data, j = j) x } } is_mask_pronoun <- function(x) { is_call(x, c("$", "[["), n = 2) && is_symbol(x[[2]], c(".data", ".env")) } is_global <- function(env) { if (identical(env, globalenv())) { return(TRUE) } # Heuristic for inside pipe if (identical(env_names(env), ".") && identical(env_parent(env), globalenv())) { return(TRUE) } FALSE } simplify_function_call <- function(x, env, data, j = TRUE) { if (inherits(x[[1]], "inline_colwise_function")) { dot_var <- data$vars[[attr(x, "position")]] out <- replace_dot(attr(x[[1]], "formula")[[2]], sym(dot_var)) dt_squash(out, env, data, j = j) } else { name <- fun_name(x[[1]]) if (is.null(name)) { return(x) } attr(x, "position") <- NULL x[[1]] <- name dt_squash(x, env, data, j = j) } } replace_dot <- function(call, sym) { if (is_symbol(call, ".") || is_symbol(call, ".x")) { sym } else if (is_call(call)) { call[] <- lapply(call, replace_dot, sym) call } else { call } } has_gforce <- c( "min", "max", "mean", "median", "var", "sd", "sum", "prod", "first", "last", "head", "tail" ) dplyr_trans <- c("n", "row_number") fun_name <- function(fun) { pkg_env <- baseenv() for (x in has_gforce) { if (!env_has(pkg_env, x, inherit = TRUE)) next fun_x <- env_get(pkg_env, x, inherit = TRUE) if (identical(fun, fun_x)) return(sym(x)) } dplyr_env <- pkg_env("dplyr") for (x in dplyr_trans) { fun_x <- env_get(dplyr_env, x, inherit = TRUE) if (identical(fun, fun_x)) return(sym(x)) } NULL } dtplyr/NEWS.md0000644000176200001440000002245614172104656012705 0ustar liggesusers# dtplyr 1.2.1 * Fix for upcoming rlang release. # dtplyr 1.2.0 ## New authors @markfairbanks, @mgirlich, and @eutwt are now dtplyr authors in recognition of their significant and sustained contributions. Along with @eutwt, they supplied the bulk of the improvements in this release! ## New features * dtplyr gains translations for many more tidyr verbs: * `drop_na()` (@markfairbanks, #194) * `complete()` (@markfairbanks, #225) * `expand()` (@markfairbanks, #225) * `fill()` (@markfairbanks, #197) * `pivot_longer()` (@markfairbanks, #204) * `replace_na()` (@markfairbanks, #202) * `nest()` (@mgirlich, #251) * `separate()` (@markfairbanks, #269) * `tally()` gains a translation (@mgirlich, #201). * `ifelse()` is mapped to `fifelse()` (@markfairbanks, #220). ## Minor improvements and bug fixes * `slice()` helpers (`slice_head()`, `slice_tail()`, `slice_min()`, `slice_max()` and `slice_sample()`) now accept negative values for `n` and `prop`. * `across()` defaults to `everything()` when `.cols` isn't provided (@markfairbanks, #231), and handles named selections (@eutwt #293). It ˜ow handles `.fns` arguments in more forms (@eutwt #288): * Anonymous functions, such as `function(x) x + 1` * Formulas which don't require a function call, such as `~ 1` * `arrange(dt, desc(col))` is translated to `dt[order(-col)]` in order to take advantage of data.table's fast order (@markfairbanks, #227). * `count()` applied to data.tables no longer breaks when dtplyr is loaded (@mgirlich, #201). * `case_when()` supports use of `T` to specify the default (#272). * `filter()` errors for named input, e.g. `filter(dt, x = 1)` (@mgirlich, #267) and works for negated logical columns (@mgirlich, @211). * `group_by()` ungroups when no grouping variables are specified (@mgirlich, #248), and supports inline mutation like `group_by(dt, y = x)` (@mgirlich, #246). * `if_else()` named arguments are translated to the correct arguments in `data.table::fifelse()` (@markfairbanks, #234). `if_else()` supports `.data` and `.env` pronouns (@markfairbanks, #220). * `if_any()` and `if_all()` default to `everything()` when `.cols` isn't provided (@eutwt, #294). * `intersect()`/`union()`/`union_all()`/`setdiff()` convert data.table inputs to `lazy_dt()` (#278). * `lag()`/`lead()` are translated to `shift()`. * `lazy_dt()` keeps groups (@mgirlich, #206). * `left_join()` produces the same column order as dplyr (@markfairbanks, #139). * `left_join()`, `right_join()`, `full_join()`, and `inner_join()` perform a cross join for `by = character()` (@mgirlich, #242). * `left_join()`, `right_join()`, and `inner_join()` are always translated to the `[.data.table` equivalent. For simple merges the translation gets a bit longer but thanks to the simpler code base it helps to better handle names in `by` and duplicated variables names produced in the data.table join (@mgirlich, #222). * `mutate()` and `transmute()` work when called without variables (@mgirlich, #248). * `mutate()` gains new experimental arguments `.before` and `.after` that allow you to control where the new columns are placed (to match dplyr 1.0.0) (@eutwt #291). * `mutate()` can modify grouping columns (instead of creating another column with the same name) (@mgirlich, #246). * `n_distinct()` is translated to `uniqueN()`. * `tally()` and `count()` follow the dplyr convention of creating a unique name if the default output `name` (n) already exists (@eutwt, #295). * `pivot_wider()` names the columns correctly when `names_from` is a numeric column (@mgirlich, #214). * `pull()` supports the `name` argument (@mgirlich, #263). * `slice()` no longer returns excess rows (#10). * `slice_*()` functions after `group_by()` are faster (@mgirlich, #216). * `slice_max()` works when ordering by a character column (@mgirlich, #218). * `summarise()` supports the `.groups` argument (@mgirlich, #245). * `summarise()`, `tally()`, and `count()` can change the value of a grouping variables (@eutwt, #295). * `transmute()` doesn't produce duplicate columns when assigning to the same variable (@mgirlich, #249). It correctly flags grouping variables so they selected (@mgirlich, #246). * `ungroup()` removes variables in `...` from grouping (@mgirlich, #253). # dtplyr 1.1.0 ## New features * All verbs now have (very basic) documentation pointing back to the dplyr generic, and providing a (very rough) description of the translation accompanied with a few examples. * Passing a data.table to a dplyr generic now converts it to a `lazy_dt()`, making it a little easier to move between data.table and dplyr syntax. * dtplyr has been bought up to compatibility with dplyr 1.0.0. This includes new translations for: * `across()`, `if_any()`, `if_all()` (#154). * `count()` (#159). * `relocate()` (@smingerson, #162). * `rename_with()` (#160) * `slice_min()`, `slice_max()`, `slice_head()`, `slice_tail()`, and `slice_sample()` (#174). And `rename()` and `select()` now support dplyr 1.0.0 tidyselect syntax (apart from predicate functions which can't easily work on lazily evaluated data tables). * We have begun the process of adding translations for tidyr verbs beginning with `pivot_wider()` (@markfairbanks, #189). ## Translation improvements * `compute()` now creates an intermediate assignment within the translation. This will generally have little impact on performance but it allows you to use intermediate variables to simplify complex translations. * `case_when()` is now translated to `fcase()` (#190). * `cur_data()` (`.SD`), `cur_group()` (`.BY`), `cur_group_id()` (`.GRP`), and `cur_group_rows() (`.I`) are now tranlsated to their data.table equivalents (#166). * `filter()` on grouped data nows use a much faster translation using on `.I` rather than `.SD` (and requiring an intermediate assignment) (#176). Thanks to suggestion from @myoung3 and @ColeMiller1. * Translation of individual expressions: * `x[[1]]` is now translated correctly. * Anonymous functions are now preserved (@smingerson, #155) * Environment variables used in the `i` argument of `[.data.table` are now correctly inlined when not in the global environment (#164). * `T` and `F` are correctly translated to `TRUE` and `FALSE` (#140). ## Minor improvements and bug fixes * Grouped filter, mutate, and slice no longer affect ordering of output (#178). * `as_tibble()` gains a `.name_repair` argument (@markfairbanks). * `as.data.table()` always calls `[]` so that the result will print (#146). * `print.lazy_dt()` shows total rows, and grouping, if present. * `group_map()` and `group_walk()` are now translated (#108). # dtplyr 1.0.1 * Better handling for `.data` and `.env` pronouns (#138). * dplyr verbs now work with `NULL` inputs (#129). * joins do better job at determining output variables in the presence of duplicated outputs (#128). When joining based on different variables in `x` and `y`, joins consistently preserve column from `x`, not `y` (#137). * `lazy_dt()` objects now have a useful `glimpse()` method (#132). * `group_by()` now has an `arrange` parameter which, if set to `FALSE`, sets the data.table translation to use `by` rather than `keyby` (#85). * `rename()` now works without `data.table` attached, as intended (@michaelchirico, #123). * dtplyr has been re-licensed as MIT (#165). # dtplyr 1.0.0 * Converted from eager approach to lazy approach. You now must use `lazy_dt()` to begin a translation pipeline, and must use `collect()`, `as.data.table()`, `as.data.frame()`, or `as_tibble()` to finish the translation and actually perform the computation (#38). This represents a complete overhaul of the package replacing the eager evaluation used in the previous releases. This unfortunately breaks all existing code that used dtplyr, but frankly the previous version was extremely inefficient so offered little of data.table's impressive speed, and was used by very few people. * dtplyr provides methods for data.tables that warning you that they use the data frame implementation and you should use `lazy_dt()` (#77) * Joins now pass `...` on to data.table's merge method (#41). * `ungroup()` now copies its input (@christophsax, #54). * `mutate()` preserves grouping (@christophsax, #17). * `if_else()` and `coalesce()` are mapped to data.table's `fifelse()` and `fcoalesce()` respectively (@michaelchirico, #112). # dtplyr 0.0.3 - Maintenance release for CRAN checks. - `inner_join()`, `left_join()`, `right_join()`, and `full_join()`: new `suffix` argument which allows you to control what suffix duplicated variable names receive, as introduced in dplyr 0.5 (#40, @christophsax). - Joins use extended `merge.data.table()` and the `on` argument, introduced in data.table 1.9.6. Avoids copy and allows joins by different keys (#20, #21, @christophsax). # dtplyr 0.0.2 - This is a compatibility release. It makes dtplyr compatible with dplyr 0.6.0 in addition to dplyr 0.5.0. # dtplyr 0.0.1 - `distinct()` gains `.keep_all` argument (#30, #31). - Slightly improve test coverage (#6). - Install `devtools` from GitHub on Travis (#32). - Joins return `data.table`. Right and full join are now implemented (#16, #19). - Remove warnings from tests (#4). - Extracted from `dplyr` at revision e5f2952923028803. dtplyr/MD50000644000176200001440000001625514172110611012103 0ustar liggesusers3c8560f57b7d830466d3a54a9f89b318 *DESCRIPTION 27a5563b3b2e3884613ea75a3305bb2a *LICENSE b65a584b2962898d413b392472e7d475 *NAMESPACE f7d76c7fb78b8d074ce4142f0eed6348 *NEWS.md 73cdc949ae8fb6a1f08f058c02b32e79 *R/complete.R e29f2ab36659ae0048dbac41aa300691 *R/count.R ff89fbf914f90f215e1fefa68a25e76c *R/dtplyr-package.R 0f986806ebc0fca2e2a6a2ccd5c488d7 *R/fill.R c0d875a2af58d2f2a759264934b15ea2 *R/replace_na.R 4edd1654335ff1256a67a002fe1466bc *R/step-assign.R b72ad22b9970de0090e5f6b19cafdbf7 *R/step-call-pivot_longer.R f379029df69c7ba5c6d67e0d1754aba6 *R/step-call-pivot_wider.R c9681377d97ae39b4ff681bb0c948010 *R/step-call.R 005d4db3e33e8606ddc1899ba021d40c *R/step-colorder-relocate.R 500f7bb6d246bfe863a5e420c8e906f0 *R/step-colorder.R 4d6a6227b737c07c30f7f6b87af7df0b *R/step-first.R b59448c716ad19e44c44a9bbd0d307ad *R/step-group.R 94d3e0d83e1e8407539b703f3c9f03b2 *R/step-join.R 5d538f9765c95809e5dccda2915ff319 *R/step-modify.R e035fd614b63e305c732f39b0124cf03 *R/step-mutate.R 94c7d574c4c6ce0c77b7d7f169b57d3c *R/step-nest.R da3205a33a2ad29d4747672d99ed18cb *R/step-set.R edad1c1ed46d0f7f90dde400da56a4fb *R/step-setnames.R f4e009c202eb476fc9c17f156f1c909a *R/step-subset-arrange.R 381426b63e51399126122454ae99cf65 *R/step-subset-do.R 4b2bee17e2653aff13ec7310c0258b42 *R/step-subset-expand.R 361507dc5ca600c3f5ef25c71c3c0fad *R/step-subset-filter.R ba32d9e05d2afbe7341b36ccb3000a17 *R/step-subset-select.R 697ec721b1400046fb6e4b3f98a33dbd *R/step-subset-separate.R c44a9a04e847978b77091f8c100ff9ef *R/step-subset-slice.R 4a6b87b0c486686beae0a00b3b835b9d *R/step-subset-summarise.R 6baa3038ba7840caca17bf2b4c7a1374 *R/step-subset-transmute.R 0cf9c7a77fe09c1b53aa8ec7ff20a02f *R/step-subset.R 043139693273fad970dfd06c9cf39c3f *R/step.R 97dd96e654dbda7ee612fd441119b4da *R/tidyeval-across.R 6443384ade7722d1592f8057064cb814 *R/tidyeval.R 0dd6f8a296acc01969a2e5a9a4c70ce9 *R/utils.R 6026a58e8292f7077a1f3c3c784aacf7 *R/zzz.R c7482dee28560c867aac1c9535c84b5c *README.md cff9db3d7b031832f1a4c016fd54d5ee *build/dtplyr.pdf a28ef5472abc437165c3e56417cda474 *build/vignette.rds 0e388eac7156e07062f46f93dbee85cb *inst/doc/translation.R 1af93888fa1b8a19e2e82b13265f6f26 *inst/doc/translation.Rmd 24714845b1a9ca552f6b86b30ac9f5e6 *inst/doc/translation.html f717094e0c8d521ab067adb23ecded4d *man/arrange.dtplyr_step.Rd 0b3cd4884c115e1db0b07ec477b97863 *man/collect.dtplyr_step.Rd dec52bbce5d280465a59ab34062f6205 *man/complete.dtplyr_step.Rd ada04180fe1a498f4281a2ce91b85d98 *man/count.dtplyr_step.Rd 8c5bd61bd9f48d490427a4c90d3057cf *man/distinct.dtplyr_step.Rd d45bc2d4c21e8435ddfcb32400b11cbe *man/drop_na.dtplyr_step.Rd 550521638fba10cdae02bb2210e69a6f *man/dtplyr-package.Rd 30827e69472f1333214d67da4eacc302 *man/expand.dtplyr_step.Rd 6dfa31db423be20f202995c4a727a55f *man/figures/logo.png d582de02914acd306dd6bfa1d5300bc0 *man/fill.dtplyr_step.Rd fb1457895ac11f3aef85b5f27854d58d *man/filter.dtplyr_step.Rd 64596e226ec1cff0e8d70291ad2989e5 *man/group_by.dtplyr_step.Rd 4c998a750037f4e0534ebebd11eee30b *man/group_modify.dtplyr_step.Rd c988f1b9dc3d12ee600530c2c928b989 *man/head.dtplyr_step.Rd cec46c1c485bce3a920ee51058065ae3 *man/intersect.dtplyr_step.Rd 2794d05f881b5b40b8f3649a4d1ce3e1 *man/lazy_dt.Rd 8551a43826018f716dff0b4e63caa29a *man/left_join.dtplyr_step.Rd 351727e78c6b9208adeb0147ef9b0a4c *man/mutate.dtplyr_step.Rd 77428c3f5f498f000f02cbed4d3fb1e7 *man/nest.dtplyr_step.Rd 0da3d5db6d20f9a220e7e5b4a0237706 *man/pivot_longer.dtplyr_step.Rd d1e0e43ac021889afb8b5fec7ebcddb9 *man/pivot_wider.dtplyr_step.Rd 9895c2bd4ba57bbd83ad8cffc35a1ddc *man/relocate.dtplyr_step.Rd e734ce49d34bb506087b31420fa575cf *man/rename.dtplyr_step.Rd 3aa13837b02ea29ef7804ffb3058ec3f *man/replace_na.dtplyr_step.Rd 0d851ef75e704f465836c5b024d932f8 *man/select.dtplyr_step.Rd 3f6bb8dd80dfa1a4bc78b6a75bfdb9e1 *man/separate.dtplyr_step.Rd 35374ba871ecb6ea00de6dcf3d0ca5dd *man/slice.dtplyr_step.Rd a23ce121db07623a9369304a5e9e3d0b *man/summarise.dtplyr_step.Rd 636b4604554408b8818314f4bc8d208d *man/transmute.dtplyr_step.Rd 6773cea4a567ea405c039094895d9d36 *tests/testthat.R 5581f03f5184cd12c74dfb9163d9e420 *tests/testthat/_snaps/count.md f30f2837d5379019dc3c69a66c20fcca *tests/testthat/_snaps/step-call-pivot_longer.md 31200aff9a94ea5885b76b3726bacb9e *tests/testthat/_snaps/step-call-pivot_wider.md 873db1cb36f891dbd52bfca824ff6502 *tests/testthat/_snaps/step-call.md 2b240a0cd062ae4d0ee47227c71c9724 *tests/testthat/_snaps/step-colorder.md 4f2d93bad6d193520644642ccb1a0a0f *tests/testthat/_snaps/step-group.md 9cf7340c3a778076b6dc816f3e8c5fa2 *tests/testthat/_snaps/step-join.md 7f0768aba5757a42cc22f6bbe0257a37 *tests/testthat/_snaps/step-subset-filter.md 175709632a83cebe0e350501100ec8b3 *tests/testthat/_snaps/step-subset-select.md cdb92b5c6d6c37c75f3f287d9b63c03e *tests/testthat/_snaps/step-subset-separate.md e2909426de5f341f77fc29be6c9c5080 *tests/testthat/_snaps/step-subset-slice.md f7194b877f893f36592417374a8af98a *tests/testthat/_snaps/step-subset-summarise.md 5927e53329bcc9edca12225a9fc510a6 *tests/testthat/_snaps/step.md 759536262960fa9a9deac0862b85fe4c *tests/testthat/_snaps/tidyeval-across.md 3ecf6c21c6610e1869452fd1f0219aff *tests/testthat/_snaps/tidyeval.md dd4a5d5d818d5b0f1d1181ec34a7335c *tests/testthat/helpers-library.R 9f70e4f31ccd2202ac448b04dee0603f *tests/testthat/test-complete.R 18f04b14e7a7e8ed49a192ddc8baf0bb *tests/testthat/test-count.R 3d518026af991f3880a0df9868b1b5d2 *tests/testthat/test-fill.R d5bcb455cc02b8ce7e4e7eb3fec1291d *tests/testthat/test-replace_na.R 896e69cc5fe15eedf893940a28f330c0 *tests/testthat/test-step-call-pivot_longer.R 42f117d6ebf94ed014ba6e999b6ca6c9 *tests/testthat/test-step-call-pivot_wider.R a8851620377bb9703725daebc8590869 *tests/testthat/test-step-call.R b8077a08ee5289210390b8479ea1a2aa *tests/testthat/test-step-colorder-relocate.R 5aae80c3810d2ace501925eb133e990b *tests/testthat/test-step-colorder.R 1c46a397ef02fe401f24cb9b2f2d6a20 *tests/testthat/test-step-first.R 599b362e6504ac191d8c824b64d4e0eb *tests/testthat/test-step-group.R cdc24a0c9a8dd8e2b80158a8b150386c *tests/testthat/test-step-join.R f2f86385f70d3aadce0d720793136220 *tests/testthat/test-step-modify.R fcad6cd5a5fc9f97abab4c8f0e4107f5 *tests/testthat/test-step-mutate.R 40c5b5c47702627a4165ce1ba7b69a4a *tests/testthat/test-step-nest.R fa2312fbe6970143842a21d2689b79be *tests/testthat/test-step-set.R 597f910c0d2c8a064db303a02c1df244 *tests/testthat/test-step-subset-arrange.R d8f9c9ce077db1c71c7ca3b20346ffc0 *tests/testthat/test-step-subset-do.R 3e537d52c81de777013d7a7cd30c6a6d *tests/testthat/test-step-subset-expand.R d2adc52a47500cb857eb510f44dd0f9a *tests/testthat/test-step-subset-filter.R f5fb5dec3379f527540282f18e610281 *tests/testthat/test-step-subset-select.R 7aeb39fc9c54a113a957f99a3ca694b6 *tests/testthat/test-step-subset-separate.R 1a13de75c7489a7f06e0b0a1af76bb8e *tests/testthat/test-step-subset-slice.R 11c4bbecbdba5a1020653a94e2fa0f04 *tests/testthat/test-step-subset-summarise.R 07ad358b9d6ab2423b18f45868e0a256 *tests/testthat/test-step-subset-transmute.R b2760ad16fc8df31e108dd39d9179127 *tests/testthat/test-step-subset.R 49ea920ef6bd0676648afd02b7347d65 *tests/testthat/test-step.R ff2aeed1e0c55ed51a9b13a29915ccb0 *tests/testthat/test-tidyeval-across.R afdbc0c4fd13cb209212161738053fa4 *tests/testthat/test-tidyeval.R 1af93888fa1b8a19e2e82b13265f6f26 *vignettes/translation.Rmd dtplyr/inst/0000755000176200001440000000000014172104724012547 5ustar liggesusersdtplyr/inst/doc/0000755000176200001440000000000014172104724013314 5ustar liggesusersdtplyr/inst/doc/translation.R0000644000176200001440000001207414172104724016001 0ustar liggesusers## ---- include = FALSE--------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----setup, message = FALSE--------------------------------------------------- library(dtplyr) library(data.table) library(dplyr) ## ----------------------------------------------------------------------------- df <- data.frame(a = 1:5, b = 1:5, c = 1:5, d = 1:5) dt <- lazy_dt(df) ## ----------------------------------------------------------------------------- dt ## ----------------------------------------------------------------------------- dt %>% show_query() ## ----------------------------------------------------------------------------- dt %>% arrange(a, b, c) %>% show_query() dt %>% filter(b == c) %>% show_query() dt %>% filter(b == c, c == d) %>% show_query() ## ----------------------------------------------------------------------------- dt %>% select(a:b) %>% show_query() dt %>% summarise(a = mean(a)) %>% show_query() dt %>% transmute(a2 = a * 2) %>% show_query() ## ----------------------------------------------------------------------------- dt %>% mutate(a2 = a * 2, b2 = b * 2) %>% show_query() ## ----------------------------------------------------------------------------- dt %>% mutate(a2 = a * 2, b2 = b * 2, a4 = a2 * 2) %>% show_query() ## ----------------------------------------------------------------------------- dt %>% transmute(a2 = a * 2, b2 = b * 2, a4 = a2 * 2) %>% show_query() ## ----------------------------------------------------------------------------- dt %>% rename(x = a, y = b) %>% show_query() ## ----------------------------------------------------------------------------- dt %>% distinct() %>% show_query() dt %>% distinct(a, b) %>% show_query() dt %>% distinct(a, b, .keep_all = TRUE) %>% show_query() ## ----------------------------------------------------------------------------- dt %>% distinct(c = a + b) %>% show_query() dt %>% distinct(c = a + b, .keep_all = TRUE) %>% show_query() ## ----------------------------------------------------------------------------- dt2 <- lazy_dt(data.frame(a = 1)) dt %>% inner_join(dt2, by = "a") %>% show_query() dt %>% right_join(dt2, by = "a") %>% show_query() dt %>% left_join(dt2, by = "a") %>% show_query() dt %>% anti_join(dt2, by = "a") %>% show_query() ## ----------------------------------------------------------------------------- dt %>% full_join(dt2, by = "a") %>% show_query() ## ----------------------------------------------------------------------------- dt3 <- lazy_dt(data.frame(b = 1, a = 1)) dt %>% left_join(dt3, by = "a") %>% show_query() dt %>% full_join(dt3, by = "b") %>% show_query() ## ----------------------------------------------------------------------------- dt %>% semi_join(dt2, by = "a") %>% show_query() ## ----------------------------------------------------------------------------- dt %>% intersect(dt2) %>% show_query() dt %>% setdiff(dt2) %>% show_query() dt %>% union(dt2) %>% show_query() ## ----------------------------------------------------------------------------- dt %>% group_by(a) %>% summarise(b = mean(b)) %>% show_query() ## ----------------------------------------------------------------------------- dt %>% group_by(a, arrange = FALSE) %>% summarise(b = mean(b)) %>% show_query() ## ----------------------------------------------------------------------------- dt %>% group_by(a) %>% filter(b < mean(b)) %>% show_query() ## ----------------------------------------------------------------------------- dt %>% filter(a == 1) %>% select(-a) %>% show_query() ## ----------------------------------------------------------------------------- dt %>% group_by(a) %>% filter(b < mean(b)) %>% summarise(c = max(c)) %>% show_query() ## ----------------------------------------------------------------------------- dt3 <- lazy_dt(data.frame(x = 1, y = 2)) dt4 <- lazy_dt(data.frame(x = 1, a = 2, b = 3, c = 4, d = 5, e = 7)) dt3 %>% left_join(dt4) %>% select(x, a:c) %>% show_query() ## ----------------------------------------------------------------------------- dt %>% select(X = a, Y = b) %>% filter(X == 1) %>% show_query() ## ----------------------------------------------------------------------------- dt %>% filter(a == 1) %>% mutate(b2 = b * 2) %>% show_query() ## ----------------------------------------------------------------------------- dt %>% mutate(a2 = a * 2, b2 = b * 2) %>% show_query() ## ----------------------------------------------------------------------------- dt %>% filter(x == 1) %>% mutate(a2 = a * 2, b2 = b * 2) %>% show_query() ## ----------------------------------------------------------------------------- dt2 <- data.table(a = 1:10) dt_inplace <- lazy_dt(dt2, immutable = FALSE) dt_inplace %>% mutate(a2 = a * 2, b2 = b * 2) %>% show_query() ## ----------------------------------------------------------------------------- bench::mark( filter = dt %>% filter(a == b, c == d), mutate = dt %>% mutate(a = a * 2, a4 = a2 * 2, a8 = a4 * 2) %>% show_query(), summarise = dt %>% group_by(a) %>% summarise(b = mean(b)) %>% show_query(), check = FALSE )[1:6] dtplyr/inst/doc/translation.Rmd0000644000176200001440000001764414126601265016333 0ustar liggesusers--- title: "Translation" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{translation} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ``` ## Introduction This vignette shows the details of how dtplyr translates dplyr expressions into the equivalent [data.table](http://r-datatable.com/) code. If you see places where you think I could generate better data.table code, please [let me know](https://github.com/tidyverse/dtplyr/issues)! This document assumes that you're familiar with the basics of data.table; if you're not, I recommend starting at `vignette("datatable-intro.html")`. ```{r setup, message = FALSE} library(dtplyr) library(data.table) library(dplyr) ``` ## The basics To get started, I'll create a simple lazy table with `lazy_dt()`: ```{r} df <- data.frame(a = 1:5, b = 1:5, c = 1:5, d = 1:5) dt <- lazy_dt(df) ``` The actual data doesn't matter here since we're just looking at the translation. When you print a lazy frame, it tells you that it's a local data table with four rows. It also prints the call that dtplyr will evaluate when we execute the lazy table. In this case it's very simple: ```{r} dt ``` If we just want to see the generated code, you can use `show_query()`. I'll use that a lot in this vignette. ```{r} dt %>% show_query() ``` ## Simple verbs Many dplyr verbs have a straightforward translation to either the `i` or `j` component of `[.data.table`. ### `filter()` and `arrange()` `filter()` and `arrange()` become elements of `i`: ```{r} dt %>% arrange(a, b, c) %>% show_query() dt %>% filter(b == c) %>% show_query() dt %>% filter(b == c, c == d) %>% show_query() ``` ### `select()`, `summarise()`, `transmute()` `select()`, `summarise()` and `transmute()` all become elements of `j`: ```{r} dt %>% select(a:b) %>% show_query() dt %>% summarise(a = mean(a)) %>% show_query() dt %>% transmute(a2 = a * 2) %>% show_query() ``` `mutate()` also uses the `j` component with data.table's special `:=` operator: ```{r} dt %>% mutate(a2 = a * 2, b2 = b * 2) %>% show_query() ``` Note that dplyr will not copy the input data by default, see below for more details. `mutate()` allows to refer to variables that you just created using an "extended `j`" expression: ```{r} dt %>% mutate(a2 = a * 2, b2 = b * 2, a4 = a2 * 2) %>% show_query() ``` `transmute()` works similarly: ```{r} dt %>% transmute(a2 = a * 2, b2 = b * 2, a4 = a2 * 2) %>% show_query() ``` ## Other calls Other verbs require calls to other functions: ### `rename()` `rename()` uses `setnames()`: ```{r} dt %>% rename(x = a, y = b) %>% show_query() ``` ### `distinct()` `distinct()` uses `unique()`: ```{r} dt %>% distinct() %>% show_query() dt %>% distinct(a, b) %>% show_query() dt %>% distinct(a, b, .keep_all = TRUE) %>% show_query() ``` `distinct()` on a computed column uses an intermediate mutate: ```{r} dt %>% distinct(c = a + b) %>% show_query() dt %>% distinct(c = a + b, .keep_all = TRUE) %>% show_query() ``` ### Joins Most joins use the `[.data.table` equivalent: ```{r} dt2 <- lazy_dt(data.frame(a = 1)) dt %>% inner_join(dt2, by = "a") %>% show_query() dt %>% right_join(dt2, by = "a") %>% show_query() dt %>% left_join(dt2, by = "a") %>% show_query() dt %>% anti_join(dt2, by = "a") %>% show_query() ``` But `full_join()` uses `merge()` ```{r} dt %>% full_join(dt2, by = "a") %>% show_query() ``` In some case extra calls to `data.table::setcolorder()` and `data.table::setnames()` are required to ensure correct column order and names in: ```{r} dt3 <- lazy_dt(data.frame(b = 1, a = 1)) dt %>% left_join(dt3, by = "a") %>% show_query() dt %>% full_join(dt3, by = "b") %>% show_query() ``` Semi-joins are little more complex: ```{r} dt %>% semi_join(dt2, by = "a") %>% show_query() ``` ### Set operations Set operations use the fast data.table alternatives: ```{r} dt %>% intersect(dt2) %>% show_query() dt %>% setdiff(dt2) %>% show_query() dt %>% union(dt2) %>% show_query() ``` ## Grouping Just like in dplyr, `group_by()` doesn't do anything by itself, but instead modifies the operation of downstream verbs. This generally just involves using the `keyby` argument: ```{r} dt %>% group_by(a) %>% summarise(b = mean(b)) %>% show_query() ``` You may use `by` instead of `keyby` if you set `arrange = FALSE`: ```{r} dt %>% group_by(a, arrange = FALSE) %>% summarise(b = mean(b)) %>% show_query() ``` Often, there won't be too much of a difference between these, but for larger grouped operations, the overhead of reordering data may become significant. In these situations, using `arrange = FALSE` becomes preferable. The primary exception is grouped `filter()`, which requires the use of `.SD`: ```{r} dt %>% group_by(a) %>% filter(b < mean(b)) %>% show_query() ``` ## Combinations dtplyr tries to generate generate data.table code as close as possible to what you'd write by hand, as this tends to unlock data.table's tremendous speed. For example, if you `filter()` and then `select()`, dtplyr generates a single `[`: ```{r} dt %>% filter(a == 1) %>% select(-a) %>% show_query() ``` And similarly when combining filtering and summarising: ```{r} dt %>% group_by(a) %>% filter(b < mean(b)) %>% summarise(c = max(c)) %>% show_query() ``` This is particularly nice when joining two tables together because you can select variables after you have joined and data.table will only carry those into the join: ```{r} dt3 <- lazy_dt(data.frame(x = 1, y = 2)) dt4 <- lazy_dt(data.frame(x = 1, a = 2, b = 3, c = 4, d = 5, e = 7)) dt3 %>% left_join(dt4) %>% select(x, a:c) %>% show_query() ``` Note, however, that `select()`ing and then `filter()`ing must generate two separate calls to `[`, because data.table evaluates `i` before `j`. ```{r} dt %>% select(X = a, Y = b) %>% filter(X == 1) %>% show_query() ``` Similarly, a `filter()` and `mutate()` can't be combined because `dt[a == 1, .(b2 := b * 2)]` would modify the selected rows in place: ```{r} dt %>% filter(a == 1) %>% mutate(b2 = b * 2) %>% show_query() ``` ## Copies By default dtplyr avoids mutating the input data, automatically creating a `copy()` if needed: ```{r} dt %>% mutate(a2 = a * 2, b2 = b * 2) %>% show_query() ``` Note that dtplyr does its best to avoid needless copies, so it won't explicitly copy if there's already an implicit copy produced by `[`, `head()`, `merge()` or similar: ```{r} dt %>% filter(x == 1) %>% mutate(a2 = a * 2, b2 = b * 2) %>% show_query() ``` You can choose to opt out of this copy, and take advantage of data.table's reference semantics (see `vignette("datatable-reference-semantics")` for more details). Do this by setting `immutable = FALSE` on construction: ```{r} dt2 <- data.table(a = 1:10) dt_inplace <- lazy_dt(dt2, immutable = FALSE) dt_inplace %>% mutate(a2 = a * 2, b2 = b * 2) %>% show_query() ``` ## Performance There are two components to the performance of dtplyr: how long it takes to generate the translation, and how well the translation performs. Given my explorations so far, I'm reasonably confident that we're generating high-quality data.table code, so most of the cost should be in the translation itself. The following code briefly explores the performance of a few different translations. A signficant amount of work is done by the dplyr verbs, so we benchmark the whole process. ```{r} bench::mark( filter = dt %>% filter(a == b, c == d), mutate = dt %>% mutate(a = a * 2, a4 = a2 * 2, a8 = a4 * 2) %>% show_query(), summarise = dt %>% group_by(a) %>% summarise(b = mean(b)) %>% show_query(), check = FALSE )[1:6] ``` These translations all take less than a millisecond, suggesting that the performance overhead of dtplyr should be negligible for realistic data sizes. Note that dtplyr run-time scales with the complexity of the pipeline, not the size of the data, so these timings should apply regardless of the size of the underlying data[^copy]. [^copy]: Unless a copy is performed. dtplyr/inst/doc/translation.html0000644000176200001440000013456514172104724016556 0ustar liggesusers Translation

Translation

Introduction

This vignette shows the details of how dtplyr translates dplyr expressions into the equivalent data.table code. If you see places where you think I could generate better data.table code, please let me know!

This document assumes that you’re familiar with the basics of data.table; if you’re not, I recommend starting at vignette("datatable-intro.html").

library(dtplyr)
library(data.table)
library(dplyr)

The basics

To get started, I’ll create a simple lazy table with lazy_dt():

df <- data.frame(a = 1:5, b = 1:5, c = 1:5, d = 1:5)
dt <- lazy_dt(df)

The actual data doesn’t matter here since we’re just looking at the translation.

When you print a lazy frame, it tells you that it’s a local data table with four rows. It also prints the call that dtplyr will evaluate when we execute the lazy table. In this case it’s very simple:

dt
#> Source: local data table [5 x 4]
#> Call:   `_DT1`
#> 
#>       a     b     c     d
#>   <int> <int> <int> <int>
#> 1     1     1     1     1
#> 2     2     2     2     2
#> 3     3     3     3     3
#> 4     4     4     4     4
#> 5     5     5     5     5
#> 
#> # Use as.data.table()/as.data.frame()/as_tibble() to access results

If we just want to see the generated code, you can use show_query(). I’ll use that a lot in this vignette.

dt %>% show_query()
#> `_DT1`

Simple verbs

Many dplyr verbs have a straightforward translation to either the i or j component of [.data.table.

filter() and arrange()

filter() and arrange() become elements of i:

dt %>% arrange(a, b, c) %>% show_query()
#> `_DT1`[order(a, b, c)]

dt %>% filter(b == c) %>% show_query()
#> `_DT1`[b == c]
dt %>% filter(b == c, c == d) %>% show_query()
#> `_DT1`[b == c & c == d]

select(), summarise(), transmute()

select(), summarise() and transmute() all become elements of j:

dt %>% select(a:b) %>% show_query()
#> `_DT1`[, .(a, b)]
dt %>% summarise(a = mean(a)) %>% show_query()
#> `_DT1`[, .(a = mean(a))]
dt %>% transmute(a2 = a * 2) %>% show_query()
#> `_DT1`[, .(a2 = a * 2)]

mutate() also uses the j component with data.table’s special := operator:

dt %>% mutate(a2 = a * 2, b2 = b * 2) %>% show_query()
#> copy(`_DT1`)[, `:=`(a2 = a * 2, b2 = b * 2)]

Note that dplyr will not copy the input data by default, see below for more details.

mutate() allows to refer to variables that you just created using an “extended j” expression:

dt %>% mutate(a2 = a * 2, b2 = b * 2, a4 = a2 * 2) %>% show_query()
#> copy(`_DT1`)[, `:=`(c("a2", "b2", "a4"), {
#>     a2 <- a * 2
#>     b2 <- b * 2
#>     a4 <- a2 * 2
#>     .(a2, b2, a4)
#> })]

transmute() works similarly:

dt %>% transmute(a2 = a * 2, b2 = b * 2, a4 = a2 * 2) %>% show_query()
#> `_DT1`[, {
#>     a2 <- a * 2
#>     b2 <- b * 2
#>     a4 <- a2 * 2
#>     .(a2, b2, a4)
#> }]

Other calls

Other verbs require calls to other functions:

rename()

rename() uses setnames():

dt %>% rename(x = a, y = b) %>% show_query()
#> setnames(copy(`_DT1`), c("a", "b"), c("x", "y"))

distinct()

distinct() uses unique():

dt %>% distinct() %>% show_query()
#> unique(`_DT1`)
dt %>% distinct(a, b) %>% show_query()
#> unique(`_DT1`[, .(a, b)])
dt %>% distinct(a, b, .keep_all = TRUE) %>% show_query()
#> unique(`_DT1`, by = c("a", "b"))

distinct() on a computed column uses an intermediate mutate:

dt %>% distinct(c = a + b) %>% show_query()
#> unique(`_DT1`[, .(c = a + b)])
dt %>% distinct(c = a + b, .keep_all = TRUE) %>% show_query()
#> unique(copy(`_DT1`)[, `:=`(c = a + b)], by = "c")

Joins

Most joins use the [.data.table equivalent:

dt2 <- lazy_dt(data.frame(a = 1))

dt %>% inner_join(dt2, by = "a") %>% show_query()
#> `_DT1`[`_DT2`, on = .(a), nomatch = NULL, allow.cartesian = TRUE]
dt %>% right_join(dt2, by = "a") %>% show_query()
#> `_DT1`[`_DT2`, on = .(a), allow.cartesian = TRUE]
dt %>% left_join(dt2, by = "a") %>% show_query()
#> `_DT2`[`_DT1`, on = .(a), allow.cartesian = TRUE]
dt %>% anti_join(dt2, by = "a") %>% show_query()
#> `_DT1`[!`_DT2`, on = .(a)]

But full_join() uses merge()

dt %>% full_join(dt2, by = "a") %>% show_query()
#> merge(`_DT1`, `_DT2`, all = TRUE, by.x = "a", by.y = "a", allow.cartesian = TRUE)

In some case extra calls to data.table::setcolorder() and data.table::setnames() are required to ensure correct column order and names in:

dt3 <- lazy_dt(data.frame(b = 1, a = 1))

dt %>% left_join(dt3, by = "a") %>% show_query()
#> setnames(setcolorder(`_DT3`[`_DT1`, on = .(a), allow.cartesian = TRUE], 
#>     c(2L, 3L, 4L, 5L, 1L)), c("i.b", "b"), c("b.x", "b.y"))
dt %>% full_join(dt3, by = "b") %>% show_query()
#> setcolorder(merge(`_DT1`, `_DT3`, all = TRUE, by.x = "b", by.y = "b", 
#>     allow.cartesian = TRUE), c(2L, 1L, 3L, 4L, 5L))

Semi-joins are little more complex:

dt %>% semi_join(dt2, by = "a") %>% show_query()
#> `_DT1`[unique(`_DT1`[`_DT2`, which = TRUE, nomatch = NULL, on = .(a)])]

Set operations

Set operations use the fast data.table alternatives:

dt %>% intersect(dt2) %>% show_query()
#> fintersect(`_DT1`, `_DT2`)
dt %>% setdiff(dt2) %>% show_query()
#> fsetdiff(`_DT1`, `_DT2`)
dt %>% union(dt2) %>% show_query()
#> funion(`_DT1`, `_DT2`)

Grouping

Just like in dplyr, group_by() doesn’t do anything by itself, but instead modifies the operation of downstream verbs. This generally just involves using the keyby argument:

dt %>% group_by(a) %>% summarise(b = mean(b)) %>% show_query()
#> `_DT1`[, .(b = mean(b)), keyby = .(a)]

You may use by instead of keyby if you set arrange = FALSE:

dt %>% group_by(a, arrange = FALSE) %>% summarise(b = mean(b)) %>% show_query()
#> `_DT1`[, .(b = mean(b)), by = .(a)]

Often, there won’t be too much of a difference between these, but for larger grouped operations, the overhead of reordering data may become significant. In these situations, using arrange = FALSE becomes preferable.

The primary exception is grouped filter(), which requires the use of .SD:

dt %>% group_by(a) %>% filter(b < mean(b)) %>% show_query()
#> `_DT1`[`_DT1`[, .I[b < mean(b)], by = .(a)]$V1]

Combinations

dtplyr tries to generate generate data.table code as close as possible to what you’d write by hand, as this tends to unlock data.table’s tremendous speed. For example, if you filter() and then select(), dtplyr generates a single [:

dt %>% 
  filter(a == 1) %>% 
  select(-a) %>% 
  show_query()
#> `_DT1`[a == 1, .(b, c, d)]

And similarly when combining filtering and summarising:

dt %>% 
  group_by(a) %>% 
  filter(b < mean(b)) %>% 
  summarise(c = max(c)) %>% 
  show_query()
#> `_DT1`[`_DT1`[, .I[b < mean(b)], by = .(a)]$V1, .(c = max(c)), 
#>     keyby = .(a)]

This is particularly nice when joining two tables together because you can select variables after you have joined and data.table will only carry those into the join:

dt3 <- lazy_dt(data.frame(x = 1, y = 2))
dt4 <- lazy_dt(data.frame(x = 1, a = 2, b = 3, c = 4, d = 5, e = 7))

dt3 %>% 
  left_join(dt4) %>% 
  select(x, a:c) %>% 
  show_query()
#> Joining, by = "x"
#> setcolorder(`_DT5`[`_DT4`, on = .(x), allow.cartesian = TRUE], 
#>     c(1L, 7L, 2L, 3L, 4L, 5L, 6L))[, .(x, a, b, c)]

Note, however, that select()ing and then filter()ing must generate two separate calls to [, because data.table evaluates i before j.

dt %>% 
  select(X = a, Y = b) %>% 
  filter(X == 1) %>% 
  show_query()
#> `_DT1`[, .(X = a, Y = b)][X == 1]

Similarly, a filter() and mutate() can’t be combined because dt[a == 1, .(b2 := b * 2)] would modify the selected rows in place:

dt %>% 
  filter(a == 1) %>% 
  mutate(b2 = b * 2) %>% 
  show_query()
#> `_DT1`[a == 1][, `:=`(b2 = b * 2)]

Copies

By default dtplyr avoids mutating the input data, automatically creating a copy() if needed:

dt %>% mutate(a2 = a * 2, b2 = b * 2) %>% show_query()
#> copy(`_DT1`)[, `:=`(a2 = a * 2, b2 = b * 2)]

Note that dtplyr does its best to avoid needless copies, so it won’t explicitly copy if there’s already an implicit copy produced by [, head(), merge() or similar:

dt %>% 
  filter(x == 1) %>% 
  mutate(a2 = a * 2, b2 = b * 2) %>% 
  show_query()
#> `_DT1`[x == 1][, `:=`(a2 = a * 2, b2 = b * 2)]

You can choose to opt out of this copy, and take advantage of data.table’s reference semantics (see vignette("datatable-reference-semantics") for more details). Do this by setting immutable = FALSE on construction:

dt2 <- data.table(a = 1:10)

dt_inplace <- lazy_dt(dt2, immutable = FALSE)
dt_inplace %>% mutate(a2 = a * 2, b2 = b * 2) %>% show_query()
#> `_DT6`[, `:=`(a2 = a * 2, b2 = b * 2)]

Performance

There are two components to the performance of dtplyr: how long it takes to generate the translation, and how well the translation performs. Given my explorations so far, I’m reasonably confident that we’re generating high-quality data.table code, so most of the cost should be in the translation itself.

The following code briefly explores the performance of a few different translations. A signficant amount of work is done by the dplyr verbs, so we benchmark the whole process.

bench::mark(
  filter = dt %>% filter(a == b, c == d),
  mutate = dt %>% mutate(a = a * 2, a4 = a2 * 2, a8 = a4 * 2) %>% show_query(),
  summarise = dt %>% group_by(a) %>% summarise(b = mean(b)) %>% show_query(),
  check = FALSE
)[1:6]
#> # A tibble: 3 × 6
#>   expression      min   median `itr/sec` mem_alloc `gc/sec`
#>   <bch:expr> <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl>
#> 1 filter        391µs    410µs     2382.        0B     35.3
#> 2 mutate        676µs    717µs     1350.        0B     35.2
#> 3 summarise     428µs    449µs     2168.        0B     32.3

These translations all take less than a millisecond, suggesting that the performance overhead of dtplyr should be negligible for realistic data sizes. Note that dtplyr run-time scales with the complexity of the pipeline, not the size of the data, so these timings should apply regardless of the size of the underlying data1.


  1. Unless a copy is performed.↩︎