dplyr/0000755000176200001440000000000014525714672011421 5ustar liggesusersdplyr/NAMESPACE0000644000176200001440000003000414406402754012626 0ustar liggesusers# Generated by roxygen2: do not edit by hand S3method("$<-",grouped_df) S3method("[",fun_list) S3method("[",grouped_df) S3method("[",rowwise_df) S3method("[<-",grouped_df) S3method("[<-",rowwise_df) S3method("[[<-",grouped_df) S3method("names<-",grouped_df) S3method("names<-",rowwise_df) S3method(add_count,data.frame) S3method(add_count,default) S3method(anti_join,data.frame) S3method(arrange,data.frame) S3method(arrange_,data.frame) S3method(arrange_,tbl_df) S3method(as.data.frame,grouped_df) S3method(as.tbl,data.frame) S3method(as.tbl,tbl) S3method(as_join_by,character) S3method(as_join_by,default) S3method(as_join_by,dplyr_join_by) S3method(as_join_by,list) S3method(as_tibble,grouped_df) S3method(as_tibble,rowwise_df) S3method(auto_copy,data.frame) S3method(cbind,grouped_df) S3method(collapse,data.frame) S3method(collect,data.frame) S3method(common_by,"NULL") S3method(common_by,character) S3method(common_by,default) S3method(common_by,list) S3method(compute,data.frame) S3method(copy_to,DBIConnection) S3method(copy_to,src_local) S3method(count,data.frame) S3method(cross_join,data.frame) S3method(distinct,data.frame) S3method(distinct_,data.frame) S3method(distinct_,grouped_df) S3method(distinct_,tbl_df) S3method(do,"NULL") S3method(do,data.frame) S3method(do,grouped_df) S3method(do,rowwise_df) S3method(do_,"NULL") S3method(do_,data.frame) S3method(do_,grouped_df) S3method(do_,rowwise_df) S3method(dplyr_col_modify,data.frame) S3method(dplyr_col_modify,grouped_df) S3method(dplyr_col_modify,rowwise_df) S3method(dplyr_reconstruct,data.frame) S3method(dplyr_reconstruct,grouped_df) S3method(dplyr_reconstruct,rowwise_df) S3method(dplyr_row_slice,data.frame) S3method(dplyr_row_slice,grouped_df) S3method(dplyr_row_slice,rowwise_df) S3method(filter,data.frame) S3method(filter,ts) S3method(filter_,data.frame) S3method(filter_,tbl_df) S3method(filter_bullets,"dplyr:::filter_incompatible_size") S3method(filter_bullets,"dplyr:::filter_incompatible_type") S3method(format,src_local) S3method(full_join,data.frame) S3method(group_by,data.frame) S3method(group_by_,data.frame) S3method(group_by_,rowwise_df) S3method(group_by_drop_default,default) S3method(group_by_drop_default,grouped_df) S3method(group_data,data.frame) S3method(group_data,grouped_df) S3method(group_data,rowwise_df) S3method(group_data,tbl_df) S3method(group_indices,data.frame) S3method(group_indices_,data.frame) S3method(group_indices_,grouped_df) S3method(group_indices_,rowwise_df) S3method(group_keys,data.frame) S3method(group_map,data.frame) S3method(group_modify,data.frame) S3method(group_modify,grouped_df) S3method(group_nest,data.frame) S3method(group_nest,grouped_df) S3method(group_size,data.frame) S3method(group_split,data.frame) S3method(group_split,grouped_df) S3method(group_split,rowwise_df) S3method(group_trim,data.frame) S3method(group_trim,grouped_df) S3method(group_vars,data.frame) S3method(groups,data.frame) S3method(inner_join,data.frame) S3method(intersect,data.frame) S3method(left_join,data.frame) S3method(mutate,data.frame) S3method(mutate_,data.frame) S3method(mutate_,tbl_df) S3method(mutate_bullets,"dplyr:::error_incompatible_combine") S3method(mutate_bullets,"dplyr:::mutate_constant_recycle_error") S3method(mutate_bullets,"dplyr:::mutate_incompatible_size") S3method(mutate_bullets,"dplyr:::mutate_mixed_null") S3method(mutate_bullets,"dplyr:::mutate_not_vector") S3method(n_groups,data.frame) S3method(nest_by,data.frame) S3method(nest_by,grouped_df) S3method(nest_join,data.frame) S3method(print,all_vars) S3method(print,any_vars) S3method(print,dplyr_join_by) S3method(print,dplyr_sel_vars) S3method(print,fun_list) S3method(print,last_dplyr_warnings) S3method(print,src) S3method(pull,data.frame) S3method(rbind,grouped_df) S3method(recode,character) S3method(recode,factor) S3method(recode,numeric) S3method(reframe,data.frame) S3method(relocate,data.frame) S3method(rename,data.frame) S3method(rename_,data.frame) S3method(rename_,grouped_df) S3method(rename_with,data.frame) S3method(right_join,data.frame) S3method(rows_append,data.frame) S3method(rows_delete,data.frame) S3method(rows_insert,data.frame) S3method(rows_patch,data.frame) S3method(rows_update,data.frame) S3method(rows_upsert,data.frame) S3method(rowwise,data.frame) S3method(rowwise,grouped_df) S3method(same_src,data.frame) S3method(sample_frac,data.frame) S3method(sample_frac,default) S3method(sample_n,data.frame) S3method(sample_n,default) S3method(select,data.frame) S3method(select,list) S3method(select_,data.frame) S3method(select_,grouped_df) S3method(semi_join,data.frame) S3method(setdiff,data.frame) S3method(setequal,data.frame) S3method(slice,data.frame) S3method(slice_,data.frame) S3method(slice_,tbl_df) S3method(slice_head,data.frame) S3method(slice_max,data.frame) S3method(slice_min,data.frame) S3method(slice_sample,data.frame) S3method(slice_tail,data.frame) S3method(src_tbls,src_local) S3method(summarise,data.frame) S3method(summarise,grouped_df) S3method(summarise,rowwise_df) S3method(summarise_,data.frame) S3method(summarise_,tbl_df) S3method(summarise_bullets,"dplyr:::summarise_incompatible_size") S3method(summarise_bullets,"dplyr:::summarise_mixed_null") S3method(summarise_bullets,"dplyr:::summarise_unsupported_type") S3method(symdiff,data.frame) S3method(symdiff,default) S3method(tally,data.frame) S3method(tbl,DBIConnection) S3method(tbl,src_local) S3method(tbl_ptype,default) S3method(tbl_sum,grouped_df) S3method(tbl_sum,rowwise_df) S3method(tbl_vars,data.frame) S3method(transmute,data.frame) S3method(transmute_,data.frame) S3method(ungroup,data.frame) S3method(ungroup,grouped_df) S3method(ungroup,rowwise_df) S3method(union,data.frame) S3method(union_all,data.frame) S3method(union_all,default) export("%>%") export(.data) export(across) export(add_count) export(add_count_) export(add_row) export(add_rownames) export(add_tally) export(add_tally_) export(all_equal) export(all_of) export(all_vars) export(anti_join) export(any_of) export(any_vars) export(arrange) export(arrange_) export(arrange_all) export(arrange_at) export(arrange_if) export(as.tbl) export(as_data_frame) export(as_label) export(as_tibble) export(auto_copy) export(bench_tbls) export(between) export(bind_cols) export(bind_rows) export(c_across) export(case_match) export(case_when) export(changes) export(check_dbplyr) export(coalesce) export(collapse) export(collect) export(combine) export(common_by) export(compare_tbls) export(compare_tbls2) export(compute) export(consecutive_id) export(contains) export(copy_to) export(count) export(count_) export(cross_join) export(cumall) export(cumany) export(cume_dist) export(cummean) export(cur_column) export(cur_data) export(cur_data_all) export(cur_group) export(cur_group_id) export(cur_group_rows) export(current_vars) export(data_frame) export(db_analyze) export(db_begin) export(db_commit) export(db_create_index) export(db_create_indexes) export(db_create_table) export(db_data_type) export(db_desc) export(db_drop_table) export(db_explain) export(db_has_table) export(db_insert_into) export(db_list_tables) export(db_query_fields) export(db_query_rows) export(db_rollback) export(db_save_query) export(db_write_table) export(dense_rank) export(desc) export(dim_desc) export(distinct) export(distinct_) export(distinct_all) export(distinct_at) export(distinct_if) export(distinct_prepare) export(do) export(do_) export(dplyr_col_modify) export(dplyr_reconstruct) export(dplyr_row_slice) export(ends_with) export(enexpr) export(enexprs) export(enquo) export(enquos) export(ensym) export(ensyms) export(eval_tbls) export(eval_tbls2) export(everything) export(explain) export(expr) export(failwith) export(filter) export(filter_) export(filter_all) export(filter_at) export(filter_if) export(first) export(full_join) export(funs) export(funs_) export(glimpse) export(group_by) export(group_by_) export(group_by_all) export(group_by_at) export(group_by_drop_default) export(group_by_if) export(group_by_prepare) export(group_cols) export(group_data) export(group_indices) export(group_indices_) export(group_keys) export(group_map) export(group_modify) export(group_nest) export(group_rows) export(group_size) export(group_split) export(group_trim) export(group_vars) export(group_walk) export(grouped_df) export(groups) export(id) export(ident) export(if_all) export(if_any) export(if_else) export(inner_join) export(intersect) export(is.grouped_df) export(is.src) export(is.tbl) export(is_grouped_df) export(join_by) export(lag) export(last) export(last_col) export(last_dplyr_warnings) export(lead) export(left_join) export(location) export(lst) export(make_tbl) export(matches) export(min_rank) export(mutate) export(mutate_) export(mutate_all) export(mutate_at) export(mutate_each) export(mutate_each_) export(mutate_if) export(n) export(n_distinct) export(n_groups) export(na_if) export(near) export(nest_by) export(nest_join) export(new_grouped_df) export(new_rowwise_df) export(nth) export(ntile) export(num_range) export(one_of) export(order_by) export(percent_rank) export(pick) export(progress_estimated) export(pull) export(quo) export(quo_name) export(quos) export(recode) export(recode_factor) export(reframe) export(relocate) export(rename) export(rename_) export(rename_all) export(rename_at) export(rename_if) export(rename_vars) export(rename_vars_) export(rename_with) export(right_join) export(row_number) export(rows_append) export(rows_delete) export(rows_insert) export(rows_patch) export(rows_update) export(rows_upsert) export(rowwise) export(same_src) export(sample_frac) export(sample_n) export(select) export(select_) export(select_all) export(select_at) export(select_if) export(select_var) export(select_vars) export(select_vars_) export(semi_join) export(setdiff) export(setequal) export(show_query) export(slice) export(slice_) export(slice_head) export(slice_max) export(slice_min) export(slice_sample) export(slice_tail) export(sql) export(sql_escape_ident) export(sql_escape_string) export(sql_join) export(sql_select) export(sql_semi_join) export(sql_set_op) export(sql_subquery) export(sql_translate_env) export(src) export(src_df) export(src_local) export(src_mysql) export(src_postgres) export(src_sqlite) export(src_tbls) export(starts_with) export(summarise) export(summarise_) export(summarise_all) export(summarise_at) export(summarise_each) export(summarise_each_) export(summarise_if) export(summarize) export(summarize_) export(summarize_all) export(summarize_at) export(summarize_each) export(summarize_each_) export(summarize_if) export(sym) export(symdiff) export(syms) export(tally) export(tally_) export(tbl) export(tbl_df) export(tbl_nongroup_vars) export(tbl_ptype) export(tbl_vars) export(tibble) export(top_frac) export(top_n) export(transmute) export(transmute_) export(transmute_all) export(transmute_at) export(transmute_if) export(tribble) export(type_sum) export(ungroup) export(union) export(union_all) export(validate_grouped_df) export(validate_rowwise_df) export(vars) export(where) export(with_groups) export(with_order) export(wrap_dbplyr_obj) import(rlang) import(vctrs, except = data_frame) importFrom(R6,R6Class) importFrom(generics,intersect) importFrom(generics,setdiff) importFrom(generics,setequal) importFrom(generics,union) importFrom(glue,glue) importFrom(glue,glue_collapse) importFrom(glue,glue_data) importFrom(lifecycle,deprecated) importFrom(magrittr,"%>%") importFrom(methods,setOldClass) importFrom(pillar,glimpse) importFrom(pillar,tbl_sum) importFrom(pillar,type_sum) importFrom(stats,setNames) importFrom(stats,update) importFrom(tibble,add_row) importFrom(tibble,as_data_frame) importFrom(tibble,as_tibble) importFrom(tibble,data_frame) importFrom(tibble,is_tibble) importFrom(tibble,lst) importFrom(tibble,new_tibble) importFrom(tibble,tibble) importFrom(tibble,tribble) importFrom(tibble,view) importFrom(tidyselect,all_of) importFrom(tidyselect,any_of) importFrom(tidyselect,contains) importFrom(tidyselect,ends_with) importFrom(tidyselect,everything) importFrom(tidyselect,last_col) importFrom(tidyselect,matches) importFrom(tidyselect,num_range) importFrom(tidyselect,one_of) importFrom(tidyselect,starts_with) importFrom(tidyselect,where) importFrom(utils,head) importFrom(utils,tail) useDynLib(dplyr, .registration = TRUE) dplyr/LICENSE0000644000176200001440000000005314406402754012415 0ustar liggesusersYEAR: 2023 COPYRIGHT HOLDER: dplyr authors dplyr/README.md0000644000176200001440000001543514525503021012670 0ustar liggesusers # dplyr [![CRAN status](https://www.r-pkg.org/badges/version/dplyr)](https://cran.r-project.org/package=dplyr) [![R-CMD-check](https://github.com/tidyverse/dplyr/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/tidyverse/dplyr/actions/workflows/R-CMD-check.yaml) [![Codecov test coverage](https://codecov.io/gh/tidyverse/dplyr/branch/main/graph/badge.svg)](https://app.codecov.io/gh/tidyverse/dplyr?branch=main) ## Overview dplyr is a grammar of data manipulation, providing a consistent set of verbs that help you solve the most common data manipulation challenges: - `mutate()` adds new variables that are functions of existing variables - `select()` picks variables based on their names. - `filter()` picks cases based on their values. - `summarise()` reduces multiple values down to a single summary. - `arrange()` changes the ordering of the rows. These all combine naturally with `group_by()` which allows you to perform any operation “by group”. You can learn more about them in `vignette("dplyr")`. As well as these single-table verbs, dplyr also provides a variety of two-table verbs, which you can learn about in `vignette("two-table")`. If you are new to dplyr, the best place to start is the [data transformation chapter](https://r4ds.hadley.nz/data-transform) in R for Data Science. ## Backends In addition to data frames/tibbles, dplyr makes working with other computational backends accessible and efficient. Below is a list of alternative backends: - [arrow](https://arrow.apache.org/docs/r/) for larger-than-memory datasets, including on remote cloud storage like AWS S3, using the Apache Arrow C++ engine, [Acero](https://arrow.apache.org/docs/cpp/streaming_execution.html). - [dtplyr](https://dtplyr.tidyverse.org/) for large, in-memory datasets. Translates your dplyr code to high performance [data.table](https://rdatatable.gitlab.io/data.table/) code. - [dbplyr](https://dbplyr.tidyverse.org/) for data stored in a relational database. Translates your dplyr code to SQL. - [duckplyr](https://duckdblabs.github.io/duckplyr/) for using [duckdb](https://duckdb.org) on large, in-memory datasets with zero extra copies. Translates your dplyr code to high performance duckdb queries with an automatic R fallback when translation isn’t possible. - [duckdb](https://duckdb.org/docs/api/r) for large datasets that are still small enough to fit on your computer. - [sparklyr](https://spark.rstudio.com) for very large datasets stored in [Apache Spark](https://spark.apache.org). ## Installation ``` r # The easiest way to get dplyr is to install the whole tidyverse: install.packages("tidyverse") # Alternatively, install just dplyr: install.packages("dplyr") ``` ### Development version To get a bug fix or to use a feature from the development version, you can install the development version of dplyr from GitHub. ``` r # install.packages("pak") pak::pak("tidyverse/dplyr") ``` ## Cheat Sheet ## Usage ``` r library(dplyr) starwars %>% filter(species == "Droid") #> # A tibble: 6 × 14 #> name height mass hair_color skin_color eye_color birth_year sex gender #> #> 1 C-3PO 167 75 gold yellow 112 none masculi… #> 2 R2-D2 96 32 white, blue red 33 none masculi… #> 3 R5-D4 97 32 white, red red NA none masculi… #> 4 IG-88 200 140 none metal red 15 none masculi… #> 5 R4-P17 96 NA none silver, red red, blue NA none feminine #> # ℹ 1 more row #> # ℹ 5 more variables: homeworld , species , films , #> # vehicles , starships starwars %>% select(name, ends_with("color")) #> # A tibble: 87 × 4 #> name hair_color skin_color eye_color #> #> 1 Luke Skywalker blond fair blue #> 2 C-3PO gold yellow #> 3 R2-D2 white, blue red #> 4 Darth Vader none white yellow #> 5 Leia Organa brown light brown #> # ℹ 82 more rows starwars %>% mutate(name, bmi = mass / ((height / 100) ^ 2)) %>% select(name:mass, bmi) #> # A tibble: 87 × 4 #> name height mass bmi #> #> 1 Luke Skywalker 172 77 26.0 #> 2 C-3PO 167 75 26.9 #> 3 R2-D2 96 32 34.7 #> 4 Darth Vader 202 136 33.3 #> 5 Leia Organa 150 49 21.8 #> # ℹ 82 more rows starwars %>% arrange(desc(mass)) #> # A tibble: 87 × 14 #> name height mass hair_color skin_color eye_color birth_year sex gender #> #> 1 Jabba De… 175 1358 green-tan… orange 600 herm… mascu… #> 2 Grievous 216 159 none brown, wh… green, y… NA male mascu… #> 3 IG-88 200 140 none metal red 15 none mascu… #> 4 Darth Va… 202 136 none white yellow 41.9 male mascu… #> 5 Tarfful 234 136 brown brown blue NA male mascu… #> # ℹ 82 more rows #> # ℹ 5 more variables: homeworld , species , films , #> # vehicles , starships starwars %>% group_by(species) %>% summarise( n = n(), mass = mean(mass, na.rm = TRUE) ) %>% filter( n > 1, mass > 50 ) #> # A tibble: 9 × 3 #> species n mass #> #> 1 Droid 6 69.8 #> 2 Gungan 3 74 #> 3 Human 35 81.3 #> 4 Kaminoan 2 88 #> 5 Mirialan 2 53.1 #> # ℹ 4 more rows ``` ## Getting help If you encounter a clear bug, please file an issue with a minimal reproducible example on [GitHub](https://github.com/tidyverse/dplyr/issues). For questions and other discussion, please use [community.rstudio.com](https://community.rstudio.com/) or the [manipulatr mailing list](https://groups.google.com/d/forum/manipulatr). ------------------------------------------------------------------------ Please note that this project is released with a [Contributor Code of Conduct](https://dplyr.tidyverse.org/CODE_OF_CONDUCT). By participating in this project you agree to abide by its terms. dplyr/data/0000755000176200001440000000000014525503021012312 5ustar liggesusersdplyr/data/starwars.rda0000644000176200001440000000656314525503021014662 0ustar liggesusersBZh91AY&SYg@?$2[W@FH}Ӷkg@wt ^RPo`˜jz F#dzC'MG4dф&F4򍩴5SA#IS"4 @4 Bde2@ 4i4iIS#JMCC@A h 424d0M1#MAbd!`L`( Ȍ&CM4 hS=F4#Fx@1_?biES!qH'GHǿRcre5xAi2Asg N5_C$"CI (!K$'kBSHB@@<<U~eԔ@ժ<]B qzk.E*-=.11p ``Idt%WT˜["+6iu46HCNH6Ă\I2@lbOz{[w өJjjj38|~wիoxuވ G%p_z92o $$tE~6dPSKh댑0LWC9qt 7JFHt$&޾yIYmFT8M=?Fu5uJYy q‚r1T3'Y0vy7JţM-(s4Lf`(Ѧ*Sf3aP AIHBI0>d@` @ ̐!< C$0ѐ7DX)"@PEł"XbX,H2@F1 HO >/ V}F{^@;H!|jt t2U<{2h@ @MjiZt)08!& *e/ɚk$`)HUt%k r`{Pb)@F O@.UiN-J_qK*TZ8Xƕ*Vn~@ra~BU#Lfzj CL._!Uer&Qt]]1={P`CYTR%$V՞gZkFc$*@d Q`F;')CI18[OQ9EAݦ6@T<ܑ`]U@&W4j2>- %`2"!t T)3A搀3vB qwq D= aO* 8$PKzZ1De) $xhVBL>ipKS}Ow. !L'1,Z/L@Yx=$4+@Ec.U#(<9rn$gf6bH U#^]6]֕!vg.ݹXؘƧ(El]+W/"(P E2I{AGM[˸dt 0mhpE,,=p`=*H F,UV+|6vl_fՆ*X mh-2[r @B 㧓FqO4;hk(N"Zܒ/PEEBCJ-WLQ8:+/E 1g-L n'd51H,:2YY:Ә[PQ2`ESB s a&M4)׳_C:1U$XVE3Y ~'n--@T2.^in#tF;H7ь 3Q a$`cl&MmZ M6d交7YS{iEA I Hjhd\V "7UBDu,H ] 3ȆD$x3T]}xtHCs,;$vؒgN"9%:ܻA]l*ʑbWfiZe6RYtjƒM͝3-}U_ZbXc::N89Jk)d`if1vr/YnCC!4~B!XRIZ5(eܺv$QI$ARҖ g|4ko@ %IێK*C.(E )UPoNrƒW#[j{#) %aH$!sWEuExn1⫪o@jfoxl\lri5xQlbJdDeQ YQk@@Eķx%7|5qY,>*ΟdIpBS4G5M=/Sk@Ym]TB]`m0Z~LMK,؀u#NUrtHc`k:bEm$w%(`ԧd|2cI7 n}5tdNK==6 ƭE%`҅PV0w`tt^ <̩A@ 4 AJ#ljHx9Њ_i{PG ))„Th@dplyr/data/band_instruments2.rda0000644000176200001440000000032213663216626016456 0ustar liggesusersBZh91AY&SYH,s2hP@ޠ@U?M=#b1iƚ 2h 414`R' 4 " WEU2؋%aX+}KIm~cu̥P A#8L}6yE*1˘(kK7!F)7UրIz!/) (RȲ }MV ث:ʻO=‘M2(4&, oH)&Z X5? Z"(H8[Ԁdplyr/data/storms.rda0000644000176200001440000021257214525503021014342 0ustar liggesusersBZh91AY&SY'|EDLq A$EDLd`8E+}Ro{Ǽw*Z+t]kݫR ;mӶ]kZe[[k$E6w7osv6ś[tH**UTPR qUWx(T ERQHT"Zh$UUDvj*%(E$%.u=7QЯ@'})LݽmZDEHSBP @%P @I@NuD )W{NpBQ@J /|( P*PJYP{}g_G@:K^P!>꯾}!^IϚ=:Cm!0L4)M)?(mCm__L|/^r&j'&%0Lą(L,WYW4m&Nl?6~s"گ8O:6:izsl'Zl2&T1ޭ֒y|%x/>~>|Oy׌^3<}|3iq} uo~Swwݍ+i]\­lR؛mE1[lQ96SEmR~W2+iuYm2NdwKkVӭIͰlS3dy5@{rS!.doZmh<쏧0(YޔشC}@n'l5OhÄFyt7yyĻc+;#6;جD xt(%ʉ >`,ȡL%5V6hQ"H6xt*:dާ`8':z6=de70󞷬Zyʜ͒mQޫN:=cօydm/Z^W8/:*u9wZW=imN`l9QVBwmG1KbYxǭC\]h W9u5\ɵEP{laxüNSuu\'0u\ҹ֗Z.aue>mG7#J R xǰiw˙q׷',[]e| dbte{#*aQ#H'(>Nke&꿛̾}O~퍭uj"j!mڶ[KcXW_q{hmǶU:z{kb^0~%/flO/9y@۶\; soq =g_{s=ca6MeU/NEͱ^ӕuEw]^8__9TSۚ6͓Y$;b5EQ'mbذ/#'rbQ{}[V1>ޟ.UͳO9VXT l"fM}m_,=ONyl>|GywP捭& F5~nUZ*6") +k[_*#3h|Wρ>}?d^ѣӫ㋾[僬RH_^VOl0I} 5EE+[l"j9ISEk^e",BJٚ9i|rxi^4|ObI2h=헶KawzUZ^vKIɲ[U-s |gєhy~<}[r͇ ר}Xf=myC4֮6ղlm1}i+W9h4[N޾EQ>y}yWO|IB7W mrEmd8Su~MW)[֋֭=؞0>M>Wl^q,Rm>4[tرTZ5mscj/V)̽W_^y⯛x{52=h>_F{( bl>:wyv[-_S{d/:F^'ےjlȽ|o>_/. .o6/~llJRj1WƢp.6I.q*{pU)oN:.bK9ca!!PO{F6٩&Sh4Wz^-)l_V:-[ N^a;@c`KXf&Lh4m+i6YsWkҷ*ȹ`uݯnz^r_>cm<DQj5o/]mbhj'Քc%|N^m'4wZK||=x>l[[jJMdٵ[R>uJiFOFI=Z>.%o:NhOSx?:K!bh#jѓYVnazJkf_O8l-:&^ߤz{U̕;dB Q[16/~/>|]h0tKĜ (|x/H5^4\vתڞdǣkii-+|JFڹ^|s>xԹQCm^_B| ,҉2krV*) >^ʪN*q9i$ihiLCTŝ_j2h%$ѱ-ٴW ӝi:}<<]<66fה9[?u!J_JSHD*}G_US@MxN?ڭK=MzTb6G7R0Z׎j o%/gp8ʞ?lsASׄx[Ce6`8V{q;ЭWryi.(ګ:07-b/*lWZUMxUyx*-CЫԝu r|}N 酴jsI`=O.q+6 ڛ y쫊{ضz#6Ot+js\26jOSN Cے\[>|C+T .a^ڛPʛCkdCjMZ6+/ѲtŲ6 h6PBVŴW%߻w6us[EzW^ڻ؟&͖ĶS͑KU[V797*3#jCK!5n\M!\|09*7bGغ>mUהd;xp{J-o|K֪s Q`=ߊd 'nrm9hR9l.ԏsF^tu_Rj6QN;羽؏96&A/Ke%l&Trl.N[R֋:'|(}P'%mSzFҫ'ʌ+J}:XzK{IϜKOsSe=WÊ1NelѰW03IFMs9Bw:j&ͥmm%mE-{w\-C iN[Y^3*'rWdM&92 ot9Vt)CV流x"|>7`ȋj ի4m$]PE(D@/VXR`^T,0H* Vt"~荺d=>H>m+liV=\|Z'6&T/rWڛħYN&Q=VI$Vz[6lju]le6mhmRjV~\I~b&4 LT[ERhCURaDQBPU u?FlWKsr %nst-6 #6EJ\JD3NpZ0Œʂasf*DPчD,R .Hqn0$uKV)e h@m KR[Y$LJ+&2Xa-FNj̪ 9,"˻:+βnPN$AUit(4(qnn\\nvGY$t@D\u»91nr,-Kb٠J"W]8Bcrˠ !SE3#CLSKbȖH;spiq$Ν9tu%۫t fL"EDu  (ˤJ 2rKMV:'= ;98r C9\J&4S4JaDYZ"YmA"*L $I4M%(UNggCܹDWܧt닠]H-2%%-h4YLť]LT9sH54Z* U5U'ACJZc$倢XB"fDj'bDDv.:;ƅӭНqqM;s.KݮEri *I,jEfFY$Hbfmh,2 ӢsJE'9ÖRU3 brBN'JQU ]afuD$I:$7]I:P\39%1(L,,& &Z& lu9ݷ&8IPI ̻Νq%9.I7MӚ!!FJri](\0*sS n]Es]D5'rwrb)AK$K]t%783f-9+I3RW*ͫZ,Rs\A$e,K :̤9aQjJI"p+J3K%FHt#9]åMFY K jEKD*M9ʒ%abhdj)LЖ%ˮ\] rqejjf1D "+BLLR93aUteۣtq ݸ:i;QW''sCnH899]\dn D KDDDNG\&:2!abdFd"D55-5hi-D9Z)`j(ef$b2#3u\NLfw.7M:u˦\uN+ ΃rJӄYb'L]b ˲ήۺ'h‚C,:q*i5 Ie@\̛H"ԘP%Aq BA6E:;LU) V$hJ $7\V[-s:EV[ +Yt]stt.wvw[ڪJ*i"Ҍ!#J*%JYPW!S+T5Q14J$SA[PfS` O3 JRa!V&b]UB64[&fYas,֒ th(YGJ16\0SV dPQhjrRDM VG+K4U*b]PTȰꥴ4D!6  XZirs9vn4 STe $@ML+Q*-"BZIΜ;Lj 2ҔRSV*i-QLbBt:YY]sDKӻܸu;p 7HeԩBĢ40֪pjPRPB!T2R"[*YSQ" k;v ź#P%M* hYJ+ '\\݇2!%D+ fӗ9ۑ quYv4Дin`ps8$DA$hRۻ;Nt9..t:ī7]s6qӡW;;h*͘B(Ԋ˕!Ɉ\bAX]ήv:0";tuԝsQJrԺ ì5AR I;]]8G)bh.Gw-a5ٸU5NFt4a!NʶF% 2raw+;er0 2֌(+W&PwkdWNHÑ5%WMf;tWw@H\ ,5QCÁg, qea*N]rIÜwBTfYhF(Whv˝EdE`bk$A˚;ú]5̛V\JZLiАTEP0$-Թ52K*U ĢVDh9ĥDNYl$9]0 Z&`N)r8]݅;s$-dVȢbYҐVDajpwDZFI$g)$E:nrrssniWwKevH]Ód5e]M&jgQ32&,@"EV]]L]\)ήBYJLkTZΑ&rЪ \%r:ĺeDjTY[QVua"aS295kNfR,G2,-)HӢVM!uKI"JĩCSRK*]iu Y&يE,RQaCa)$E䄍FdF!J(0g,B#uĈ9vs9B 5VFF$bM:5аT8]VRԬgJFeu8tU$!s %s,[J(Y,S3D0rwA#u:&C5(;nu9"IuD,fgZ,5f)3 !#3)D(XBȹRUL:TFipB:VfRL.HZ D BVfRsKZI:'s]t:,wY'q1\ d]Pec *)8SD,Xj8wv9؝s]wDudz% PLt%R$5'"X KҒ%UANYQ. =J@u *F#%zM[N6juwޖ\VWW`YbƼ.HsPO$9b(^|~ qJ8KJ_O~9~\3F~;~oUk2K&_h,Qڳb՚kar\nLr*(濾{t"|eQDZhy  ]W_uu;ed$NG` g6Qۊv\8}rU:EqP a2orq#UzD1W#h-F&$J "{{s\Rh>n lG'"n+,8Z.-iGU/.H& F҅T|=υb+TMzխE{UR KWȾfѻBKކ= Co%uTT?e~7÷/|dT#!_FVPE]Udꮡu9.JȮ+_Lq9ΐKSꡔ~`~wF{xɎ6vэ^o'MY>J|ڛu1S'[tMӲ4whM‘;S>4('(rsVSkjQ̓嵹mb65H@i"QfNZe$b$ma,f0֨R-hPLb+19"$XeQYVQ!TPAb% J$R%QZ,1VtrHXgRjdET©BTTR%TEL#IQ L4Y!u귷q+DG_DZ??>=>|(pC:=o!cF~[Viͦc&ۄ‚n_?ȯ m&SKHwrD{w`PW d-A (;Z8~AZ9+b"ʠ{e8cHøH6LA?0Gv988휨ޣQ(wgs;(d;'Z= [bŵ ׇv_L›_)Uﴏ\ehs66'ݲ/4 ڶL4a+rL{sLьO;]ӵ9I`qv9(wW)%{z.0ČJ>ǸI|sjM;yq j˫פ!HQ4:Uo,Ƕ,=觇,CH9x>c{Ǜ6!Jś>'π=\YL/: cL!=VD0cw62^Pב_{Y,FyNJXusOkX5b51/ 5fLLg-3G . 9ul/[B ̪Qy;lJ*jIExS*bG8U?^ \J8MEOIW"-l+b[H[Rvv@i)җyx:Hw*rjM3"OPzv}'QTz#BW ;ԝ}C$"jO)bCUz$drDU^RڦL6(ڗdW£;5' ;R\BEv$xHa6J6ڳVEW T[~缅JH*%X}$*t*>D_-֛&ɲeLekQ|/J]=T^']Ub#+3SeZZfbc һIdq+*ֳV kX֦["\JQ\KSIO$ zwT[ k5fTxHtt'z+TWW{b)^ew6652DFD=۾1_**I|@a1SRK_[K?J?=q>~R>ҕO()?v/ʶ&_GTEOEU*<<:3.*^;tAVIvrJ4OU{ YxTQ{UN%~6{=ʶڗPG.rO>!yttt;'z/`'4wיO:;%SyU@{G_eSϻTׄwI׋v+JNWWTeS%yX'Ϥm)_6h+$PC-=G*(Y ߉_i }OƨJÛ"C*/^[_I.@mq+a߹N{K\+TTWE{x)~E*+>KJ HW6gC vOE~Q뒯}W> +&Z}ʎ*|jQ%~9˜jUQs"U9De2Mi52ɜ$ *A EU$, NuM 2bh9 !"a5\$Ŭim#*RթPr%4:-$鈅R!H-%fj:]Awqw iÙ*iZJs-YAZ%rPZifRVjEH[5B:""TD(ldDtЈ6!ՔS<S~gxWľϮ|/I|>}"TMC3h.mj>YCr9r`< ޟұx%ˊ6wbϏn O!m+b 2uS9d簘bob . J/2Nuw[~J<:Pvoй!z{q%m8wmɕöwuri $٭O5̗ZfXl/pz\_'c~Y~|sGk~=mj̭dx?qPrR &S(o3'K!5r-,h<]Fe&L4!J#ǍBaD ar#!PL!X˔+$A' %l-8$JVp^A9`r{tA FrMj-- S?VPdt4^ԃ^{w}Oߑ,WlV ;4|?4I :PjUAR'CwV@A]J ʣ޳kJS桨"= _U}%SBrS[q#8pw:_\KʗO) O:|#Ǻ2q8Tbȼ SzT:=蘢%M/zWTv4E^(>uh̥´UU8GDURzOj'Fl+?7/Qπ"`Wč~5{R(VPTm)_AL jKci[Bi+ rNj k,ήu6#sfhK"ԎuHQe٧z$|:,+!^$tװuWtUt_p7VLhŽ\UsEq_0]I JE"z%\FhCRNF#tTQ`pM\#lJ* q ~b]_ڧȽ?xAҢO$~—(tt<2 V»DϨʯp}^#Gd/xTBj.|+͇ VKQ$=WBUP!_`eSt >OQ*h.JTPqer$SJv CO*5 $=؇U%nYTeCO^j*}j'Ε>3p#@ Fc`}=*}$/qQޤt%CK"xA?bB9Wߡ"rW*p2/?_%~JtUzp{I?f,GƉ~lT~[Fj)Ig+"cz\XA`S;Uţ;6B p˶gGˑOǫǔo}I=,{wc!&+;&qF'{gxO $ڬ92(IWRI-|4׿>;jIT?y-M{],[ɽWas2nۚᤚ6ad%_0|16o@FL+6LX6#7Í1=#NhH'dS&l>涢@ThЍDVͱi571, mŜC IEljUQU\-2jϞmI$!\9UO:H(SX*ss)9.d9JS 9-TEۚ.,Zj˔U LĪAD[E"Š a* r5M"щs 9aZJAaRUudv]ӧ2rw.q ܸB515Rd("(BJ& Թ.g;ΚeD4EQa4S)kXbeR:&dI'xz}K澍{=ME3\C& ; "gI|k^?c?i!6bLf*-^F7)c>NX݄ YE3ѲLaϽڢ5b1m6V6#bRj5fK"2X$F5$f2-[6цF[-[aM(Dc"DK13c"HeSi$llS*L(HCJ4fRXC&iSi31 #e l$)Rd͙B1e$BRd P"c4$&̙1)f4ҙE%B,fh4BH254Q"#I2&6lRf4h ,Q)d$Q2L64H d`f",Y$)4R4c 3%"SP2&ʌ3ȣ(T"JT@,5$) 2MHQV,j,lK5,(ѢY-2FHZMF)˜̣bI6,͌fY b1&(EBĴl3cXQ1E`ؑ(TTVH6 $I)I 6AE goQV-9rhygݪQQآ!-yfŷHkma hcbgҪg.U.\l9p<]M:[w8FRV$I6S*e`Z1{rIWG$2ŤhiY-8(GüځvLEwӰHMN[LL61fPɎJD/pdCZP".Lp{]wETH|DPAZ0g;lhۻerf6R*6+775ن[pד @*𡤊-%_nmHXlc2Ŝ]SZÆsMZSMUY&Y_y0qF'E9.|/*cDiudn3|ǍBG׬{йN9Hiȑ;?Έ;CM 3j!?,숤:0Ӎ .2}eǛpLΓ'`@wX&B;s:w,8άH ؎;p8A`Th=ž};,oO@}jg{)A[z7|i{7Uy}]}-["&W-[&rrke[nFJSk3F#X-}LdR^]ݱD2R{etbh&Y,̶BWu\W9£v^-RGy.Έ(_܈&{M3G"QE zHTZ㠔p#B;V!;8Sy3{ӭ+r.Fyc ^s\ Z$'r(-cq1qIYar]U6kiKVVI QBrH@jHlLlm( (K;u8Nnbqhrs[(z]skMSrW4'@R5AA<765bz]حjݷuչ{q*O_lrْTÞV^[;wT+v[j+ZQlW)){Q\hqH.-UIm "D%j)u2T*S6<ݔUAE.7Uw>x:)ɻTɶQg[y-yEmbMbIb_х(qR QFP(ZD@Ƣۚ娣`\_-^Qd,Nn]#ҁXQ,YB1e[kTnk˓ט1QAbCFSFc AY,4\EE&bBAZE&t8˱8q8'm * vJLH+Raݫg8-˛aKwQl2mrܰłE]$K#8Q Dםں 0wLыDdMLM[D SbMPP#Kq72_?s_c }eOi=kی Z6k` WQiN'Gηy͗t՘o&zw}_4YPҰT|+k|DI:>,'AAJ/gS||4.iRba@~b(,AXcrڙ9Rvo2zEg$42>6L2 c/me#QƯ[߿Xfe=AH .'G泖 pN3w!OF>̺^P]ȈgF*lnGKݾ^ m;jpar\=!˔~c7Bc͙5"I=cl=#C_ҿ9n=l`g:ӀI'*O67+ O:&8<+?Gk4)hqrJ [*#ϣLHx=tɽIЍp؎^G?K:1I;taݏsvMѐCu}“qF/up}T}+6ED"@H ?SW/o/[Hc7x|x}߭,75{gm/d}owӮzo;ko։{1þ=^Nb/{޻Tnk9n>eݕz@mϲm㭸c*f9ׇTӍV~_?/ӗ=vazyR uy ꅩX[OU  a>>J_O˷>Wa~qgu4L9˾O˕t0,:͗}ى庑}JDա]3v7x75㷒x6񍭝;t_4'Ae _K"q$xEQя ,)`r4i0cqnwN\Q2e'픍Xxsƨ5rۓy7 ~<(.&~oZ'LyR./p|QOII[vޝ| H 'zq y\ꞗ\¥tJݙY< 2!L1"ƨiԊj4Pb_vIj%34F h(Mc/_tƊQkݰ $k@IF$IlU(+sЪUEGA[<)):IDEw3qTe.ӹˤ' ELFEQI`ۚF@\np H@FLI lɎC6\rIIG8 :r -Zu$ej(kC OB*ΑM.PLr{qɗc *wee$bҡ9rervHx PpN:@]*50V9# Ӿz|TC'2sr,0sM@XEnknT 6x豆WZ6"f5@д4udq(dIXW+ Q+ɹ$)>u LG;{ d}&JAp)zhZh\)\첰 . GPBvS9Q\?wӁpv_i&ۼ*S"AT]!v#opNq:"ri}|ݼ|v[_n~~My޲h DyBqE8EEޭ*+#z 8Y AD(Zs&dHOH.hYpcL$"Lotѯ䢍Ȉ=ӥsWH ͼvt0ldfwWbR5+b-Q` :rʼ}~5=QW"Y#(k(q!8Z 1ӻ;¬N ~7_yؔ<\hTg!x'FDb@%L'p"d4@27k>mhi@589ӱ'/`^+DHb@"^gs9 FvA$ ٔ&$}|GD,.%^wҚNr|EJ+ /h׋^Cb\.3ً 7.˔U%BOtBUD}`sPK~ŧ(,^v& .G(=.*e܃y[d%Z7SpzICŷp+ g[DT$o󀸥g(n9:7(u ]>#Ǿ>N<EC>~ǔ !sz*Dy{E}! x>+pWweJ|~`q̼QwY媐Θɵ}+'M+5-#8XIt!` 61ۥ;gy3 ؟٥f.C"ZŕyQ@U_+.I4Jِ3RsnGȐ]p*sJA(\ $bVQ˭|r>Ѽ.k H{EG$Iq8),Tr+DOU1ytltGf}v"h'4\B)yQjAvh" x6 mK\HĿW $6- JDTFz)Zэpp 9vDz( qKEЫ*S "M$B<$gHr$>U|sN8#$AH!roW^sPO(rx<:DrPul'O"]'C|BM335E:Dγ(KiMkf1WCz2x1G›D*42R1N}n+vȠ'+ C %@|;hOi (5$*\<ȫpf$ӗ/<;/+**RjN~;"wβ!&yM/$jUPJ*؝Q Թ7'b`"Q_c)˜G9s|֕ea>HNVH:]̒Nevt%s9W7S8$:yxJpnDQωu= YG *Ed*sSR*W7ܻY NfQV߫`v a) HeL4@DIs! 4Ԡ`оE4*}i=KRu&=[~8zwXxϧKճ+y511<`!N63%`&{+oyτX%#>t($8ۚf[A6.'̈́\ޜZ`V-"<L@ 1 H DУKQwV!QW愞$w?JBf]LY_9tG*._n" >NbJI I˧:tE]#YvQSˑ^F\Ԫr|` =yW*娮Ltag|Tr257п4["L{ 874R~nCY<_)|+ ֙2D“$޶MweAxy{FWӴ'U{f}=*1ip_Gp%\fI^z)^QTdUڜjQQ^Q9JVQQzY!BHj{qFJmrǽ9DZrt$u˥WD)!zgpKbӇp=}*|{kG GIDw؅|ɔȢA0Yc0+Ay Iј`]BVҞwHJb,Vju*>Ha^EsQwNIs]9N$Np5:,~D _F=iL}H/E}{t>񷉱:U(G r79a.9Ze¦Xk A SG4Hly܈PmQ %h}D/,4#M/ _x;^ ~8*VnLkצ 8%Pl@` ~)d׸NicRtZN%aXrJ*L:Խ"<}л &DBo-5 h}ܫV쨈Tz LJQ[,q/F_BG^ľB D{ >y"0YR'JxMСBŒL RWpB@Jgs /EtvFQ$l"=mEcXip)P +JkraIrpkznPH K|ze)d[=Dڇ,ljȠǏ]b9~q2$ycΐ<@Mh{6߄4ˍúB5K`ڵŜGRr;i0YEN[pFcA152km`>kM= בzi XǑEbbBo <cH{?5M1Z~>E"?8vgL񮯄2N5 Mgn FQp|;v/+#ϗ>>1@mဃM'^'ϣL33ࡀb=1lįPK">*rw͞epQWEb< f48']Tcq3pL)vBYR?޾c)ӓuẄ́&9,/PVT2t2B#DutP\*L?)Qv|:+6j0^(.35D)$LQE^hn ֢ 8O=FܚاM>E)%ʬAp[FQ]^LlsM]yOΖT'ӐMC/צ˅7d5x1ƄlߝU@o1{=Ru[;ɀ(ݸF3[b r\fƵ貺`$ <>q Aa-Q<&;O|SՔDQ^~8D&EHGw@«R+;hF"'҉HH&&E"P9KXt88O~Q!{*F^|&~N'G ͈#Ku3߿Ii5}m#H>LuA$H;I bNL xuO>E>#re!ƌ@c}DY1HɎCi*RAd.CRȃ nH JdDLfebz:y11??Bbpk%!E6c7H)Y R`XO?wOw!jŭycGyԮgt|x/3U4:41~J?ǹ@T@Gcy"_-H2,2 xR2LLm-<WtE"F7o=-o ,iNöpL򾞽{i6hڴ/4ԭgXQ7D*gyO(n7,Ë A ޘ8$f\;繑'nld)C3A«DլIe>B CYJ g#3yկ-FKAQpgRwa"Q8d ZԌ&gLu3S"}F+HCkE>TQfm ЄlۼHt .+v)KvYF$Ɂ.K~& PMM6]ZzHlџ{#kguD7FM N.D [].5$6ONpxWn~5%6]]56Y(,0X|mCӑKUYA[6G5+^^vi '5d^#ς4iw/?5/٥fIMni~-!GHv3hŋ=֙0[ʈFFe8R +%gCS&``p )9,EX\]HcjX&EdY*d05$;&2!M W"  $P ߉z0GWm83^AF{,bX ) gߩ6輎R*AH RfA;&ߴhBj8hĈ7v b29& 08YꐃLCjp~R~EN Կ{F[`qClc *͂jFKXYUN&3P$Z@`Zy diuĩٌ[Z^zs҇]σt*<7BLo0L0iK,;G2*xS1vv&OTYFcf@LFp4b6i A!!MDY VB4A K)9ɗ4R1Xb񤛵uٓ)&d l3JC)tw, 42B.g2fϻ~x87~JcQI ma̹R.` ua4D0a*~8ijC[ү=FGw 80UMIn+"4A@Y>¾ha {r%q2Uv$Wdښ2=un͈R;"CR&@Ƙ]  ˝ mm8Lp2E s8:־+lSm7ie< .|0YkFTld˼f3H2< GQTz3 7)K$&0f}R Y $Cf8X]LԆ2pN2`6s@D-bo12B%" 'DՑ;vҌ&qpϐ6;H0#v7FƆ< #db*ލ"KR`>ײ / !oH%ߓ潘hܘ?~tk8"e)$IݸXx? %!Çݻ5/2oH^h1N7w:A`|ԅ8"A#CPSNֱzb2"^3 6Rم BK꼈90U89oz嬋Gؖ3vȩfvD2gYo<42ŕG3(#@z^r5,:ݚs\滚OC>01gOՂ{ЄyVE:_]Sl^!ΉZ2,Z2G/UH`X1]$7~l̃4#yѫKF޸㊐r?Y 8.Y&2FLM2v $!c.!DP^XN9*C)o:Ұ+ r*,a7??<VC.\wdLBڑ'9QXhVViݝ%L]B(I3pPd('UXb'h\ڼW?TJ#=d_'1y_Z*}?IpkV~$6=B8.IȄ֎XIg&l88Bc' vKc{plb@=B(:CI(eVڰ~I{㿱s/N՗5R9br:M 2GvHo- C wIZMXɋ5=rS2#$;@;;U#Nr7-.}U)nM@d҄.2U'2XML7)ؤ:U M^4lj26r~dL62#L+_s% DL8CTt\vH*2U\9Q5Å A]II75A;wk[ag>-j'l J@䤉 8o`!Ԡ`niQ/F%2 iˑ؉}-Hԣ-^LR&Gq`Xeۿr P@zS]Yx ,lEn=nT$x #4yc-V1CK[O%oZdowZOBV6;G9yD֤bD8`2"Y&=-UjdBLX L4}yqyv%"@/\~~HxN;e6'4d _@5*XQM(?yA.݅U>s.$ffH,L6ݙ* ;,|y|Kǿ#9z^KeX eQ Dh9b#dw3G޶fO.?oeRi>|gYFO!R `~!82n 捋^eKkGٙ dnNfԻ8L,|5:ͣ٪LMF_'ۧk{ƖⰟyijȤXv_z`a+ c f4BRy^%ˎ؀OrݮKL1$"Q/l[)u 5B,%A7rccWB'q~le (G?XU6nj#$% !T|ݹ Hv34P0R8z7^D' -!;K-S.F1iYhNDt/~א N~|3x֬ȹ%f-.ôiJKypxe҂^oR[/S*j4uG99dX+PBKX~Yd% $I B?CԶԤ-8ȏb[|K? U>.L`_L 7ӿSq$Xt;kuC7*6~1>ߋר{gNߎ꩟k~$5w^qV o6q]`O?c7zmS;__ ?;l0.C/ v[53^Syy:lٻ/k# l4h,χ7ۦ{S)<*Ƽ_ZH6?;+>;CJSn%O{ c<[ۤ㵩^1ݏnڶ#ѥfc? cK;z㎧u[+powne<7fӈ)>]oqv<_Fڛֱ~;ޜlێ;͕}̮m<9:eUO wہjnUfe {wcgeNި UGZFj>#=v}tYxxliyi;i=?|_Sai ^.\{[dri]qz8h18Sߜ[u { oJJ˝?#w.~vsC-^'l8ruj8,7;:sx5{+xBa;o:=~XbkFH]֚x-Ovvdk=GGۍdz 9Y<8whʣƆ2m.8'>[e]NC2D1i!n?տiv{tӝ7.4 μZs:Ö;>yjGg[d99veg(='zg0QO lo%X vٱǏ>8tNSX=A|M1yr~وỏG[ܨ}ӷ{`'.}]:V<^Mlr`)SEq<{w^ν w|KGr~vSf=x`s6{&'Wk-X]ԸWP.Wlm^sEI#sy)sNrV7(ñ?m8Sbx&}u׎œ—qժͫ@@㓳Y*n~No\3q音o^{A0B9(ֆN0-H'8'}Vq ㍫Glϟ MRc7 &2oe"A-YaIɪш !;PE~XT=c]&?ݻZ6j.MXSa |]8[3*")5*=DІo\nzNkY~AA+M-˱g#crئUsRp*`3Nsݔo۴Mǒ{8u?ÛLcz0Γ Gmo\3ρ8rfwg+K< -l&4#^)-K3Mڴ؋V"X՜N.egr%§uNOuVx~&ז'ihZUirPND|EXON2k烏.5۝irrE;q\gH jTɹ2@hۨScf0_&Y=OoM\Ƨ]xuCg c[u3cZsec QJ܃SPDA=^pj\p|q] ;4=fortWw~1iLn/0gTFp;;L_ 'z*EZe /S7NK#+V xh2KLt@y %ʐ;Zs.ݝ^2$ٽVXos6gׇd`kXUh ܖ}# NwP`b'dTeP!B"̈́Qp3զM.)qoeS.f9 CD(ab*vtALp uv9i_i3|kpgxV/\)r搳;ie (pcʻP6fLU^Y:TT1jSΗ经y8As7gkX{ۀh̘hSY1YU r$ nS3%lMl6.B1#F󥐠e3ڸ`0ݶ5Egrcs:whS{PY ʱC7ገ0@L0 gZEHq̉HPtCOKA1鞛H"l}'W2``ݧ; .u4b.սdG3B_,mJnޘcECe|ZD):cI&RN우p'T/4) ofҔqkѨ%26 }b )ӧ -M8PmHI9Ws6;6w 4I=76!bcL0`(v+lŌ̦So|gJKiw ZwqzLc>H7( ux;ZO-#m4nz[lJcQh (o]߅Q'OxH{Y# 7jx~EIuhzIg: M1+@BҐcOo-N'4M5*<"nULɝW:%,y^ͬTU`bրĹ;!{5TaԳToFj]7lΖP.nS;gg`3ͲQd7#M_^>6u >mS/7FƏIW@JWOORo/=(2j0[k8[8GeS"bwfˆٸ!L:cqێ7ߤ.lODjn)vzRW}oj2^}O|G⁈HhIE hN6kN*dwY|4Puvg}H3 yPɥ$wMT" LfA瓎Sb!li ȰzuƑ4ory."šo_ה)˧5Dign/Znxx~Y1z5eI2NSay;,O Ȗw|lϑD3y]ؒT29s1x>a?C pJ%'>^^Y&hkm-˰0vdO@LP %3Mvp'(߄I뚀$> erw8Jl $8W Y)#xw10gZRmQO,q|ד'<{/U`xCT6xͫb PSFUŊIMdLtd"htSK-(=1ЩB!n3.zd<AibϮHuB8U)weZ[t*4l'ly2ZMؙ8+z5,rwM#n,f׈k136vbf-Vٹߺ[k Ewݨ *H XOb45sxFTͦTMJ"lpآKZ0aĎrjCaNc~G*) ~ǜ'ZM4A^^߭ޫ}| A&-Ɣ/bE+G =σ'q1]TQW1\ !W5[=jX3C"=̝ީ훲^yؗj75J-8r`÷3XP(AʐłJĐ8dEƔi\++1t%jE0`c|MKKa9ZxbL)G;5 Kw ?~g2y g\ l M5.Mh!]e~}>S5z]V}q{g)a Fܫ 2Qۮd7[Y5uL`6@ϫo#+`&jL^Q1 j^__FܫpzVKRiauYѪ0LN 3 E^s fXzUޔ w.LO-m9ׁ?ԟ~OZjvX}wG}M z[h 5_h6wib7{VC~p`g_6,Q A-kIB$p(`Z`vkY.׌s L҂ƅ؍XR PSXi ߀S/nЭ.9y ;givuL|"$-h !4@1"0a5)U3SM'fj+>] [JEE+Nl{j#ËcG*XA}çۆ5f|T wb OMD2Zl 㱷PLTfփ9'Un(/CH?]wP[vZ&# uoK$M/^: ڕIFr4f5!h L؄TKf@fTB-kFVt2,~.Y^wOM_おWem/}:3Xi9ڂlL<2!N&}ckc3Ƌ$?Q3 f(NJ faN:JNu/d~Z/? bZ|WMSS/oӞp F'E^^by~s 'Y샭wfS<>n=HqȨğv%΂yِ7bھ"A&>Ax^Z$_~ل#>禕 _*!k^ĵ zfG@1fdhiԵܴܐTN4nssIH> ZҌ~j>}Yp 1 3dO3dԉY MVx o뛸|wcE_l_Nݝޱ"muo`=VZmU{|#P߸ |y:8"#l^s$[K9j^l1E#Y`V ! ZxODvϊSgY=3m gFmdž$xz099O, :`HH3I s4N'G%ʇ/܈56Ǝ43J>|s_V8 Ƽ{ kĘěhź g(!n&7Ʀz-x,~4qŻ>vQNl~Ex/m.1m6444{;TI&OjF_@K<7K0orI>Z&Xgw'x6)CjDZ1?ZTM"_o[=}\O}*(%$Y/SKMNPCr歴driodLR7a2(=G$r BB@cUQ4/bȠ ~cc,5g~"I bMS3NO!R{ Mug7;+U2&B8Ha8>zyWXS~/3S^"W?hbDl>E=>Q~.ܛ{tO].r!Nd"lz.0޽Ϩ!<=@6+q35|Ea&W5N~\i2` i{6i\w^DⳂZ&OV~Z;ɮ$7l_Cپ[wݶ{t-7| {"9!qfRĩ(b9a"9ϚM1>$K[" 8[Q[sDO J*zE\}"▓zl2(p%ui#("1Þ 2h/Lfy~Bz6Mg~_zˏTovPN~u~À<@Й7|xdkpE`b94smVVEi ,v _gMzn`1 p̩*DΜDngN0*9k7_}4ZWM8d@{RپA-(8:8'$x#fb 쪜 f/.ԠpqA PC'a3/-`H(PxSx>3_"y8_-F UalHASJ.oHwY9|̴2| ,N( c%a}y ?dp5`$#ny>\֥S/ԓwļSz ȘF-.jDj/>Dȁ_p1-11~/'(TP[Dpy]N) 8fskih@jעpH H뾷}lfǤ=OԨxTAO4VFL  kSq:|Ql0Rc3nLM3(ptu<0 f0i:L'.2 $QAVG F` в, *'} y&gFF%^{͑WGc7E?NO16`godTxI(idIv 4¬ WsJ$f; 2)e!J(~'`%)" lDJDH"wH,{gx&b#"xވ+H A !@E[a﬉VsՐެǕMwA~'d Duo 2GJ"JHт&wkƁ!yY ~ԒiE}/ L|yg1JQ&FD*9j*D6 -1(a"I$1h2GwtczƜaཕRUW=k}}/|\TIE? 4 } Jl 1 JecZlV*[FJD(Q)Qʕ+6mjQ3=J9)f+CNlCaZ0LD*fIHE,DJ+6FM%ZmЌ YQ-BIdVBԎF,9L"ҕ #5++4fU*ZEY"[YiELM*IRdV,: dF ꦥ(4X$$PʫJDRr$SBDa+2¥4DIjӨp#ElIjdh"!ARbB.L,K-#5T5!SLPrS iDBrUrRR#-V1$bXr#B22YtIVaFPSh\-:(BYdY,JPEGhjV%faΘm$P`"*Q¢gH,噤&]L"$"ͧD`.&U֘UJFe&R*J2kL"KHڨf5N:G2DLQNa(%$tRD")FEń4*BM Spt8s3)3DTja 0Բ-*(()UaR 5 MK)+T%"`j uAKT33U*2,5dU[B▥FUrZZf* D«$A ff"RPK)DBs-*Vlȑ:RPiNےQ t)¤B$4 3 Sb-i] ,+.IIfFs*e%Zj L∅5!a*ԔHW-Ś"EmZmSNEeI*QhedI(1L5 JNhWDTr# KRˡ3+sjF,$UYt I-HΩrAE uf#:XTSPӑbTԭ,)B(Ni1#HDKZFj36DFau3K4ӊE*$*RB$(ƥR*j\HT2144DU̩3"悚5DT*M50UVT"BPjQņ$Kd,)2JYR D)EZ%2ᑄ&jeʹUʂ""J$5UJ.GHQ5ehr+Raef2(SZ\T*E2LTJ* D(ʣ$hVdjmE""&!EZDJ&BBYHp[g hFaTRBrZdf\̉SDbY&6bVU$JeQidp+RM $LI.Rĥ*H*("j*IR #LH)*dt",U-Ҕ՝K#E#(-!$8"բ.)[Q%a*4 t]K $CJD6V",(M A03 *eFmeHh(GEN0@Ъ)Ј$4+BԌK-j,̫YZ`[6b9ĺ*tR$I*4B(ҫLL5"-DA AH&YdUIH9K$C)"Ą*J2ULZ*ՙZaiaRs4#&aQG4:ik$8arјrD)52-DD +16TS%4A* )T&U*%2e"f$t+P9+Jʫ4ÒFa֘*4IVċQUfaZF!D˘(ҤB1ZaBE*ZTMsCZͪ' $eR!iajIl0-Ns+@Hu S --0.\JS9 NIhL( iDZfDlC4a9YiWJJPVRV&Fi+U"4ŒhHfR515t4J:[P0mfeLK1R6HҒkE 3FH(YfBb$I΂,"gU0ThR( MiDSC$*iI RD\BEЍõRXU\PU 5(%:DNI&I,$"RRMkJM )T .LS-ea"jHeQIXQNJҢZp*T9hjD"$APCUPر`U$6BUDNpV@̶Ȩ+jR!tT[bZBF$qwb9"fӴiLX:DB qR:\KKJңKfaF$ Md,(BZ*"u$EbZS*Z]"NZ(fQY4YJ\i!*"5,(N"fBD%AB" Tha![KD jHF!eS$(lꐢЄ9+54"ԉJS9HNR I)$eXD!"]N I%V43$5jI j0)DDa jZ 34:dEBƪZ)Xe*6mVFY0S$RU#,5E3j`&ʤIY `9F+Xh]$k+ҵ8`[XHQpJiJHi""֖QiH Y#D69ejhXfZ"I f(D-em&"U%$HQKE&eC$%DDD5d3eVIk -A3LUfS (IZXG5 4Q RNm$ * 64PЎJe%&H$!jZͭ#J `\vP3Zh,$.rj:*!)#AvD˻s1 9IHm0vvT0("k$lQq_hCfS/>.@QAw od*"##;(EԀo~.._ݟL&$wl or,9IMZe(NՎJ銔q$,#_ sr8S`U¼O+ jkxt7v`h}U>Du>y|l!`H`}Zez2*jJ" Dʈ7d׾DQ~D '`?򔿑i@4G91z?5?M5OR~ҏ\wvȈ1<lǝPRlUU[n6jU{v5aTݯVmZqnẗ6JcƁA41D MU c\6EMc鲠t走1ښ5V2”tfoLmunbf:-Iq4 H ̭srMsP`5jo:w?~_a N?S G/~DCxSi~T0!o軐)[U۵ 71aNߩ"_" ~0ʏ9:W1\﬷l.q{P#$Ȝ= ;T\?rl8*k|U$bEyw|e}Hx"! X|,ZIZ>\ ?wzD<|bZXO, JY\ T"x#%"ASo[gqxP& ;P5U~S6QMw]A -qHzϐN9%- L4C! 3$01[ U‚2Lhap,#e%rRqˇ [.nʜ4W-A]4h ,#h"抹r#ER`d+r湤$ۻH0\wus 8 ;*!$B\"aT kIsFGnd1sr\hƝI .+7. AZXE9s͌$ەq &J9ҋ]`KH((Kp7(6h(ŰZ@R)8u@\Ad.AE\6rܢNtrfPhi@IFdhĈS(B'e\N'Lp4)E-AE2¸ Qmr\rKki5+1ڒآMw]\N諕d2XX4ksrܹPU^;Wū|VƙYbUZlj M;`)b(P*P|ã'tO9ȠO([L2%I^H sFx7ZC{wj""}Ϸ~I>8z/bWNuȋ4L!IAw!`W,Mh^DxC*qې55{tNPRTR%$i$IFeeRb %l Hh$"ʒȥ$&,U#+ ,ȋ(FJd$HI,b!0PĔHPF тBH4Q0E0&Ha1bT2$ :ڮXjɶF(QF))"T"Ō&J#%14DRAlͣ#6E #RH$A(K#6)%)B) H -+B~dHA)fJPQ (0 0B"#$!B1B%(€`23 d!' t@Dcʆo4!Q;tN)SJ."<J oHgaN %<5el+2B* (@yi|GH$]=`9}Y?#4e-BW'ъ3VPi~ru |4lzyr EriH%*]'<3$ozחN."dOF"wd.i0cOw=yû]wQsbX@yrSR@Weք P"( !W RH ҪO*R Ai@𯖗ɐ҆-QOv`ـZPFN`! ysF H&Ti@i!*d&a;&!v$y-S"Eج/1J>ȯ2R&E~Ŕ-CddEY ^;]܊/Đ^IcaX0'2@܈[cD֓P: < EOŁP)ԁ}TT *ȝhҏf55Hj.CfuKpKQN(S`UZ߀f|o0&{NU w2.v_LsrD3Y~: '[>,VGDQم@ԗDjuKϛSK;_q>TcS(׽)ooK^-i+t"J5 _qjC2LIyCkO}_w}~_}_@{?I}|c^# R!jMxvE3,16C-p@_S/*._}M}%=!;l*"~Wʙ>cL^;ŃB$/i7e'~D_"{U<^> B]\Fdj5(wf AQ9ޗpb8G;=5Gy/(e{٠= (.#>{Lya1}'OSS"u:$#tGP!Zp ϳ蜷h} WݐtJ'+9^ǢOښuפ{瑜ut~{zv oAhR+y=o'2(`JΫF=\jc ׇ*)OW{" )ȂހEMYHVtO×Ήq$dO9/"~[T }ɔ AsEMoF|@6`|9SԀ V@|q=?ogfzwl?4Ʒ M~GS\ kd@#HCƀ|yyXTփZe:H<L|C.gր(@q+ٸ2<Ü P=e(P" "PmՍhXmhT(UEhJiDF(Z@ih5DmccljѵHS(qЩTM JIAjlkbգ ((HҩHbDB* -+@R1XEڣ[\ʍX-j,Zj0%-R@JbģETQƱh֢Z0J- Ĕ+*4 -j[EUjč4QJ ]EEkmhL@ZPJZT#j5UUFۚVZ5cV%* B PR&%TĮ%mhXڍb+QEZUhnUQhjƱ[EZѬVDUتm5QkhڣV6`ڍUF(ܑ1)BT& k"k^-p((V)hJ(iTL@S hiJ+cUTH% !H шDĢR RJ RP!WP-Z-Eb֍mhhs\1Z؊֍-X֌UkmEثbUTJ#BR Rb1#Kzη{rr?ṳ>OK/bowl=Ȋ"Х ET~h}!ݮbԚ {{xwr ij*ԊN\ivԪxLPv6qc11&l3L1߬C1%:bHܷ6zxGm 5jT̡PkvW‚MXz 0K7Pb)\kdzNfYbxB{y_ρ>UM`_?2QOt|ς{0J!oMI|)Go/D|W %/Er'.Qt9?1&?QRP=<,Nv}VF[lH$KDU%D$i/nM +0؊EtیoIySPk`8՗@{ˢ8$syX,P_,AVL2íAd2HdhdBA 1A ȔHI1d#"f Rl%$a2IDFiJ `2PS"ȒTj2$S(ҔX#LJb M%BэDiK4c$(ƚd#!ԈQcLkQ LhbF$(h(1TJhъ$6Ĕ[Ŋ (IXHĒY-I61b(ZKFCS(ƍFF Kc#&FZ2ZMFŲZ6mkDDXm-IdɶXƋEVKE4bؒf!Y4Z((کK (Pe6J14mIXTmcTRQi4REZ6V4Qlkb1"F,j+QZ6(-E6bѨL-TU5nͮm7[c6*M#VXL(A0M[@4mEHۄl?#!>JD@P\!r7L)__( Cَ413ڀʰPF0+(?I'B AߐW^;J.{:Uc>V(Tg㜫 ܐ}_m#"d{{mח^ӠM B> (lrw(sސaC$d)0 j0;a$R.p%;iq]M P6Ma(Dn٨ "i0cԝ/Qe|4Ro/FbnzTiQX2DTkO(56̤h19XSZ(R_gek޹^̓sqUYIWD萏ΞLR(N)8FIir*F,ڙַWؾDpΝ4W ־5hW&ëf״x*jCR/¯GI/ &j( a Pwxf+"h8)(GT  B MgO7\zɽT`PQNL.LM.ز2gj-䌷r1?RYz^oihdsлU[6e}78Γ25jKzAQa}gy] B+qDJPRB+6V.mpX(j ݴ覵˻WkSBzmYע۹sҦ%ױծYEw d"yl7åFͪN*5Zە^Z,kjan8;wMcJɒg)ָ$ݵrBAl%7fD\ؓV `cyޓnhi&$ylUuɕi<x*bF U#<ch;7.o^cpA{\g- .Ș!PX0iȺ(Sx;׻A;$i> g_mh}/}{M|2݅fŃHX!;Q YUӼ!W+@TDO!0Rt ;G+@-wF;vG_KE>EGd;'ٔr%dk9xsBq̙L"(F2Fve0(|X{O΀\ܞw`O\f8EDDP1J FHIH S2B4LBd#$D&2 0MiƣY#151FɱE$ZFFIQE ddLBlʊaldƍ&JB(cTFbMMciƐJ$ƍFF 5QY*J -EDkhQi"$A,P RQhbI)PaF60`Č(dĖɍJX31XL6"FcXLTAh,Xk)E-6hʱѬ,[ZأbMZfThɤ6aIfRTYjQFѱAi hcQh%b26MFĥbF,PkFj5QbM&L1T[ljŨlbQB(6KkI3"FZ5#&4ĉSDBAE@aP{ЪpjJ+IQRpooŧnBqD+SQ]  T]ESo 0)BxN8;&(ߐF)!2>>섮]iGW޼r| 8RwaQUއ/DJ'6AyUO&&EӴ:"*ܬB)^+[֯qF䡫<~suPv9:<z skghE܉S:"sGnTvvvEPa@\*a HAx6%{h~teķߡ@C^O~FT.fS(,|/oXHPH ^{pb{qv($ۃ DHBs˹G0*WfWHp5|$Xऔ{wWW(ѤQp: rE1m[ֽqq@:6d(qkBs2HP@ޠ@ S="=P QM3SAdA)sdb-T}P߻"I~pnTQ 'K đ^l&enXe`feUG/|*HCGN!koQlF^Mp+=x/H =dplyr/man/0000755000176200001440000000000014525503021012154 5ustar liggesusersdplyr/man/cross_join.Rd0000644000176200001440000000505014366556340014632 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/join-cross.R \name{cross_join} \alias{cross_join} \title{Cross join} \usage{ cross_join(x, y, ..., copy = FALSE, suffix = c(".x", ".y")) } \arguments{ \item{x, y}{A pair of data frames, data frame extensions (e.g. a tibble), or lazy data frames (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for more details.} \item{...}{Other parameters passed onto methods.} \item{copy}{If \code{x} and \code{y} are not from the same data source, and \code{copy} is \code{TRUE}, then \code{y} will be copied into the same src as \code{x}. This allows you to join tables across srcs, but it is a potentially expensive operation so you must opt into it.} \item{suffix}{If there are non-joined duplicate variables in \code{x} and \code{y}, these suffixes will be added to the output to disambiguate them. Should be a character vector of length 2.} } \value{ An object of the same type as \code{x} (including the same groups). The output has the following properties: \itemize{ \item There are \code{nrow(x) * nrow(y)} rows returned. \item Output columns include all columns from both \code{x} and \code{y}. Column name collisions are resolved using \code{suffix}. \item The order of the rows and columns of \code{x} is preserved as much as possible. } } \description{ Cross joins match each row in \code{x} to every row in \code{y}, resulting in a data frame with \code{nrow(x) * nrow(y)} rows. Since cross joins result in all possible matches between \code{x} and \code{y}, they technically serve as the basis for all \link[=mutate-joins]{mutating joins}, which can generally be thought of as cross joins followed by a filter. In practice, a more specialized procedure is used for better performance. } \section{Methods}{ This function is a \strong{generic}, which means that packages can provide implementations (methods) for other classes. See the documentation of individual methods for extra arguments and differences in behaviour. The following methods are currently available in loaded packages: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("cross_join")}. } \examples{ # Cross joins match each row in `x` to every row in `y`. # Data within the columns is not used in the matching process. cross_join(band_instruments, band_members) # Control the suffix added to variables duplicated in # `x` and `y` with `suffix`. cross_join(band_instruments, band_members, suffix = c("", "_y")) } \seealso{ Other joins: \code{\link{filter-joins}}, \code{\link{mutate-joins}}, \code{\link{nest_join}()} } \concept{joins} dplyr/man/glimpse.Rd0000644000176200001440000000176314366556340014131 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/reexport-pillar.R \name{glimpse} \alias{glimpse} \title{Get a glimpse of your data} \value{ x original x is (invisibly) returned, allowing \code{glimpse()} to be used within a data pipeline. } \description{ \code{glimpse()} is like a transposed version of \code{print()}: columns run down the page, and data runs across. This makes it possible to see every column in a data frame. It's a little like \code{\link[=str]{str()}} applied to a data frame but it tries to show you as much data as possible. (And it always shows the underlying data, even when applied to a remote data source.) \code{glimpse()} is provided by the pillar package, and re-exported by dplyr. See \code{\link[pillar:glimpse]{pillar::glimpse()}} for more details. } \examples{ glimpse(mtcars) # Note that original x is (invisibly) returned, allowing `glimpse()` to be # used within a pipeline. mtcars \%>\% glimpse() \%>\% select(1:3) glimpse(starwars) } dplyr/man/tbl_vars.Rd0000644000176200001440000000116614366556340014302 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/tbl.R \name{tbl_vars} \alias{tbl_vars} \alias{tbl_nongroup_vars} \title{List variables provided by a tbl.} \usage{ tbl_vars(x) tbl_nongroup_vars(x) } \arguments{ \item{x}{A tbl object} } \description{ \code{tbl_vars()} returns all variables while \code{tbl_nongroup_vars()} returns only non-grouping variables. The \code{groups} attribute of the object returned by \code{tbl_vars()} is a character vector of the grouping columns. } \seealso{ \code{\link[=group_vars]{group_vars()}} for a function that returns grouping variables. } \keyword{internal} dplyr/man/count.Rd0000644000176200001440000000771414420040360013600 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/count-tally.R \name{count} \alias{count} \alias{count.data.frame} \alias{tally} \alias{add_count} \alias{add_tally} \title{Count the observations in each group} \usage{ count(x, ..., wt = NULL, sort = FALSE, name = NULL) \method{count}{data.frame}( x, ..., wt = NULL, sort = FALSE, name = NULL, .drop = group_by_drop_default(x) ) tally(x, wt = NULL, sort = FALSE, name = NULL) add_count(x, ..., wt = NULL, sort = FALSE, name = NULL, .drop = deprecated()) add_tally(x, wt = NULL, sort = FALSE, name = NULL) } \arguments{ \item{x}{A data frame, data frame extension (e.g. a tibble), or a lazy data frame (e.g. from dbplyr or dtplyr).} \item{...}{<\code{\link[rlang:args_data_masking]{data-masking}}> Variables to group by.} \item{wt}{<\code{\link[rlang:args_data_masking]{data-masking}}> Frequency weights. Can be \code{NULL} or a variable: \itemize{ \item If \code{NULL} (the default), counts the number of rows in each group. \item If a variable, computes \code{sum(wt)} for each group. }} \item{sort}{If \code{TRUE}, will show the largest groups at the top.} \item{name}{The name of the new column in the output. If omitted, it will default to \code{n}. If there's already a column called \code{n}, it will use \code{nn}. If there's a column called \code{n} and \code{nn}, it'll use \code{nnn}, and so on, adding \code{n}s until it gets a new name.} \item{.drop}{Handling of factor levels that don't appear in the data, passed on to \code{\link[=group_by]{group_by()}}. For \code{count()}: if \code{FALSE} will include counts for empty groups (i.e. for levels of factors that don't exist in the data). \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} For \code{add_count()}: deprecated since it can't actually affect the output.} } \value{ An object of the same type as \code{.data}. \code{count()} and \code{add_count()} group transiently, so the output has the same groups as the input. } \description{ \code{count()} lets you quickly count the unique values of one or more variables: \code{df \%>\% count(a, b)} is roughly equivalent to \code{df \%>\% group_by(a, b) \%>\% summarise(n = n())}. \code{count()} is paired with \code{tally()}, a lower-level helper that is equivalent to \code{df \%>\% summarise(n = n())}. Supply \code{wt} to perform weighted counts, switching the summary from \code{n = n()} to \code{n = sum(wt)}. \code{add_count()} and \code{add_tally()} are equivalents to \code{count()} and \code{tally()} but use \code{mutate()} instead of \code{summarise()} so that they add a new column with group-wise counts. } \examples{ # count() is a convenient way to get a sense of the distribution of # values in a dataset starwars \%>\% count(species) starwars \%>\% count(species, sort = TRUE) starwars \%>\% count(sex, gender, sort = TRUE) starwars \%>\% count(birth_decade = round(birth_year, -1)) # use the `wt` argument to perform a weighted count. This is useful # when the data has already been aggregated once df <- tribble( ~name, ~gender, ~runs, "Max", "male", 10, "Sandra", "female", 1, "Susan", "female", 4 ) # counts rows: df \%>\% count(gender) # counts runs: df \%>\% count(gender, wt = runs) # When factors are involved, `.drop = FALSE` can be used to retain factor # levels that don't appear in the data df2 <- tibble( id = 1:5, type = factor(c("a", "c", "a", NA, "a"), levels = c("a", "b", "c")) ) df2 \%>\% count(type) df2 \%>\% count(type, .drop = FALSE) # Or, using `group_by()`: df2 \%>\% group_by(type, .drop = FALSE) \%>\% count() # tally() is a lower-level function that assumes you've done the grouping starwars \%>\% tally() starwars \%>\% group_by(species) \%>\% tally() # both count() and tally() have add_ variants that work like # mutate() instead of summarise df \%>\% add_count(gender, wt = runs) df \%>\% add_tally(wt = runs) } dplyr/man/summarise_each.Rd0000644000176200001440000000157214272222416015442 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/deprec-lazyeval.R \name{summarise_each} \alias{summarise_each} \alias{summarise_each_} \alias{mutate_each} \alias{mutate_each_} \alias{summarize_each} \alias{summarize_each_} \title{Summarise and mutate multiple columns.} \usage{ summarise_each(tbl, funs, ...) summarise_each_(tbl, funs, vars) mutate_each(tbl, funs, ...) mutate_each_(tbl, funs, vars) summarize_each(tbl, funs, ...) summarize_each_(tbl, funs, vars) } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} \code{mutate_each()} and \code{summarise_each()} are deprecated in favour of the new \code{\link[=across]{across()}} function that works within \code{summarise()} and \code{mutate()}. } \keyword{internal} dplyr/man/ntile.Rd0000644000176200001440000000260414366556340013577 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/rank.R \name{ntile} \alias{ntile} \title{Bucket a numeric vector into \code{n} groups} \usage{ ntile(x = row_number(), n) } \arguments{ \item{x}{A vector to rank By default, the smallest values will get the smallest ranks. Use \code{\link[=desc]{desc()}} to reverse the direction so the largest values get the smallest ranks. Missing values will be given rank \code{NA}. Use \code{coalesce(x, Inf)} or \code{coalesce(x, -Inf)} if you want to treat them as the largest or smallest values respectively. To rank by multiple columns at once, supply a data frame.} \item{n}{Number of groups to bucket into} } \description{ \code{ntile()} is a sort of very rough rank, which breaks the input vector into \code{n} buckets. If \code{length(x)} is not an integer multiple of \code{n}, the size of the buckets will differ by up to one, with larger buckets coming first. Unlike other ranking functions, \code{ntile()} ignores ties: it will create evenly sized buckets even if the same value of \code{x} ends up in different buckets. } \examples{ x <- c(5, 1, 3, 2, 2, NA) ntile(x, 2) ntile(x, 4) # If the bucket sizes are uneven, the larger buckets come first ntile(1:8, 3) # Ties are ignored ntile(rep(1, 8), 3) } \seealso{ Other ranking functions: \code{\link{percent_rank}()}, \code{\link{row_number}()} } \concept{ranking functions} dplyr/man/do.Rd0000644000176200001440000000374414366556340013074 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/deprec-do.R \name{do} \alias{do} \title{Do anything} \usage{ do(.data, ...) } \arguments{ \item{.data}{a tbl} \item{...}{Expressions to apply to each group. If named, results will be stored in a new column. If unnamed, must return a data frame. You can use \code{.} to refer to the current group. You can not mix named and unnamed arguments.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#superseded}{\figure{lifecycle-superseded.svg}{options: alt='[Superseded]'}}}{\strong{[Superseded]}} \code{do()} is superseded as of dplyr 1.0.0, because its syntax never really felt like it belonged with the rest of dplyr. It's replaced by a combination of \code{\link[=reframe]{reframe()}} (which can produce multiple rows and multiple columns), \code{\link[=nest_by]{nest_by()}} (which creates a \link{rowwise} tibble of nested data), and \code{\link[=pick]{pick()}} (which allows you to access the data for the "current" group). } \examples{ # do() with unnamed arguments becomes reframe() or summarise() # . becomes pick() by_cyl <- mtcars \%>\% group_by(cyl) by_cyl \%>\% do(head(., 2)) # -> by_cyl \%>\% reframe(head(pick(everything()), 2)) by_cyl \%>\% slice_head(n = 2) # Can refer to variables directly by_cyl \%>\% do(mean = mean(.$vs)) # -> by_cyl \%>\% summarise(mean = mean(vs)) # do() with named arguments becomes nest_by() + mutate() & list() models <- by_cyl \%>\% do(mod = lm(mpg ~ disp, data = .)) # -> models <- mtcars \%>\% nest_by(cyl) \%>\% mutate(mod = list(lm(mpg ~ disp, data = data))) models \%>\% summarise(rsq = summary(mod)$r.squared) # use broom to turn models into data models \%>\% do(data.frame( var = names(coef(.$mod)), coef(summary(.$mod))) ) \dontshow{if (requireNamespace("broom", quietly = TRUE)) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} # -> models \%>\% reframe(broom::tidy(mod)) \dontshow{\}) # examplesIf} } \keyword{internal} dplyr/man/top_n.Rd0000644000176200001440000000360614266276767013621 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/top-n.R \name{top_n} \alias{top_n} \alias{top_frac} \title{Select top (or bottom) n rows (by value)} \usage{ top_n(x, n, wt) top_frac(x, n, wt) } \arguments{ \item{x}{A data frame.} \item{n}{Number of rows to return for \code{top_n()}, fraction of rows to return for \code{top_frac()}. If \code{n} is positive, selects the top rows. If negative, selects the bottom rows. If \code{x} is grouped, this is the number (or fraction) of rows per group. Will include more rows if there are ties.} \item{wt}{(Optional). The variable to use for ordering. If not specified, defaults to the last variable in the tbl.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#superseded}{\figure{lifecycle-superseded.svg}{options: alt='[Superseded]'}}}{\strong{[Superseded]}} \code{top_n()} has been superseded in favour of \code{\link[=slice_min]{slice_min()}}/\code{\link[=slice_max]{slice_max()}}. While it will not be deprecated in the near future, retirement means that we will only perform critical bug fixes, so we recommend moving to the newer alternatives. \code{top_n()} was superseded because the name was fundamentally confusing as it returned what you might reasonably consider to be the \emph{bottom} rows. Additionally, the \code{wt} variable had a confusing name, and strange default (the last column in the data frame). Unfortunately we could not see an easy way to fix the existing \code{top_n()} function without breaking existing code, so we created a new alternative. } \examples{ df <- data.frame(x = c(6, 4, 1, 10, 3, 1, 1)) df \%>\% top_n(2) # highest values df \%>\% top_n(-2) # lowest values # now use df \%>\% slice_max(x, n = 2) df \%>\% slice_min(x, n = 2) # top_frac() -> prop argument of slice_min()/slice_max() df \%>\% top_frac(.5) # -> df \%>\% slice_max(x, prop = 0.5) } \keyword{internal} dplyr/man/arrange_all.Rd0000644000176200001440000000642014366556340014733 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/colwise-arrange.R \name{arrange_all} \alias{arrange_all} \alias{arrange_at} \alias{arrange_if} \title{Arrange rows by a selection of variables} \usage{ arrange_all(.tbl, .funs = list(), ..., .by_group = FALSE, .locale = NULL) arrange_at(.tbl, .vars, .funs = list(), ..., .by_group = FALSE, .locale = NULL) arrange_if( .tbl, .predicate, .funs = list(), ..., .by_group = FALSE, .locale = NULL ) } \arguments{ \item{.tbl}{A \code{tbl} object.} \item{.funs}{A function \code{fun}, a quosure style lambda \code{~ fun(.)} or a list of either form.} \item{...}{Additional arguments for the function calls in \code{.funs}. These are evaluated only once, with \link[rlang:dyn-dots]{tidy dots} support.} \item{.by_group}{If \code{TRUE}, will sort first by grouping variable. Applies to grouped data frames only.} \item{.locale}{The locale to sort character vectors in. \itemize{ \item If \code{NULL}, the default, uses the \code{"C"} locale unless the \code{dplyr.legacy_locale} global option escape hatch is active. See the \link{dplyr-locale} help page for more details. \item If a single string from \code{\link[stringi:stri_locale_list]{stringi::stri_locale_list()}} is supplied, then this will be used as the locale to sort with. For example, \code{"en"} will sort with the American English locale. This requires the stringi package. \item If \code{"C"} is supplied, then character vectors will always be sorted in the C locale. This does not require stringi and is often much faster than supplying a locale identifier. } The C locale is not the same as English locales, such as \code{"en"}, particularly when it comes to data containing a mix of upper and lower case letters. This is explained in more detail on the \link[=dplyr-locale]{locale} help page under the \verb{Default locale} section.} \item{.vars}{A list of columns generated by \code{\link[=vars]{vars()}}, a character vector of column names, a numeric vector of column positions, or \code{NULL}.} \item{.predicate}{A predicate function to be applied to the columns or a logical vector. The variables for which \code{.predicate} is or returns \code{TRUE} are selected. This argument is passed to \code{\link[rlang:as_function]{rlang::as_function()}} and thus supports quosure-style lambda functions and strings representing function names.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#superseded}{\figure{lifecycle-superseded.svg}{options: alt='[Superseded]'}}}{\strong{[Superseded]}} Scoped verbs (\verb{_if}, \verb{_at}, \verb{_all}) have been superseded by the use of \code{\link[=pick]{pick()}} or \code{\link[=across]{across()}} in an existing verb. See \code{vignette("colwise")} for details. These \link{scoped} variants of \code{\link[=arrange]{arrange()}} sort a data frame by a selection of variables. Like \code{\link[=arrange]{arrange()}}, you can modify the variables before ordering with the \code{.funs} argument. } \section{Grouping variables}{ The grouping variables that are part of the selection participate in the sorting of the data frame. } \examples{ df <- as_tibble(mtcars) arrange_all(df) # -> arrange(df, pick(everything())) arrange_all(df, desc) # -> arrange(df, across(everything(), desc)) } \keyword{internal} dplyr/man/group_trim.Rd0000644000176200001440000000215314366556340014652 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/group-trim.R \name{group_trim} \alias{group_trim} \title{Trim grouping structure} \usage{ group_trim(.tbl, .drop = group_by_drop_default(.tbl)) } \arguments{ \item{.tbl}{A \link[=grouped_df]{grouped data frame}} \item{.drop}{See \code{\link[=group_by]{group_by()}}} } \value{ A \link[=grouped_df]{grouped data frame} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} Drop unused levels of all factors that are used as grouping variables, then recalculates the grouping structure. \code{group_trim()} is particularly useful after a \code{\link[=filter]{filter()}} that is intended to select a subset of groups. } \examples{ iris \%>\% group_by(Species) \%>\% filter(Species == "setosa", .preserve = TRUE) \%>\% group_trim() } \seealso{ Other grouping functions: \code{\link{group_by}()}, \code{\link{group_map}()}, \code{\link{group_nest}()}, \code{\link{group_split}()} } \concept{grouping functions} dplyr/man/last_dplyr_warnings.Rd0000644000176200001440000000122514366556340016547 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/conditions.R \name{last_dplyr_warnings} \alias{last_dplyr_warnings} \title{Show warnings from the last command} \usage{ last_dplyr_warnings(n = 5) } \arguments{ \item{n}{Passed to \code{\link[=head]{head()}} so that only the first \code{n} warnings are displayed.} } \description{ Warnings that occur inside a dplyr verb like \code{mutate()} are caught and stashed away instead of being emitted to the console. This prevents rowwise and grouped data frames from flooding the console with warnings. To see the original warnings, use \code{last_dplyr_warnings()}. } \keyword{internal} dplyr/man/summarise.Rd0000644000176200001440000001455414406402754014472 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/summarise.R \name{summarise} \alias{summarise} \alias{summarize} \title{Summarise each group down to one row} \usage{ summarise(.data, ..., .by = NULL, .groups = NULL) summarize(.data, ..., .by = NULL, .groups = NULL) } \arguments{ \item{.data}{A data frame, data frame extension (e.g. a tibble), or a lazy data frame (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for more details.} \item{...}{<\code{\link[rlang:args_data_masking]{data-masking}}> Name-value pairs of summary functions. The name will be the name of the variable in the result. The value can be: \itemize{ \item A vector of length 1, e.g. \code{min(x)}, \code{n()}, or \code{sum(is.na(y))}. \item A data frame, to add multiple columns from a single expression. } \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} Returning values with size 0 or >1 was deprecated as of 1.1.0. Please use \code{\link[=reframe]{reframe()}} for this instead.} \item{.by}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} <\code{\link[=dplyr_tidy_select]{tidy-select}}> Optionally, a selection of columns to group by for just this operation, functioning as an alternative to \code{\link[=group_by]{group_by()}}. For details and examples, see \link[=dplyr_by]{?dplyr_by}.} \item{.groups}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} Grouping structure of the result. \itemize{ \item "drop_last": dropping the last level of grouping. This was the only supported option before version 1.0.0. \item "drop": All levels of grouping are dropped. \item "keep": Same grouping structure as \code{.data}. \item "rowwise": Each row is its own group. } When \code{.groups} is not specified, it is chosen based on the number of rows of the results: \itemize{ \item If all the results have 1 row, you get "drop_last". \item If the number of rows varies, you get "keep" (note that returning a variable number of rows was deprecated in favor of \code{\link[=reframe]{reframe()}}, which also unconditionally drops all levels of grouping). } In addition, a message informs you of that choice, unless the result is ungrouped, the option "dplyr.summarise.inform" is set to \code{FALSE}, or when \code{summarise()} is called from a function in a package.} } \value{ An object \emph{usually} of the same type as \code{.data}. \itemize{ \item The rows come from the underlying \code{\link[=group_keys]{group_keys()}}. \item The columns are a combination of the grouping keys and the summary expressions that you provide. \item The grouping structure is controlled by the \verb{.groups=} argument, the output may be another \link{grouped_df}, a \link{tibble} or a \link{rowwise} data frame. \item Data frame attributes are \strong{not} preserved, because \code{summarise()} fundamentally creates a new data frame. } } \description{ \code{summarise()} creates a new data frame. It returns one row for each combination of grouping variables; if there are no grouping variables, the output will have a single row summarising all observations in the input. It will contain one column for each grouping variable and one column for each of the summary statistics that you have specified. \code{summarise()} and \code{summarize()} are synonyms. } \section{Useful functions}{ \itemize{ \item Center: \code{\link[=mean]{mean()}}, \code{\link[=median]{median()}} \item Spread: \code{\link[=sd]{sd()}}, \code{\link[=IQR]{IQR()}}, \code{\link[=mad]{mad()}} \item Range: \code{\link[=min]{min()}}, \code{\link[=max]{max()}}, \item Position: \code{\link[=first]{first()}}, \code{\link[=last]{last()}}, \code{\link[=nth]{nth()}}, \item Count: \code{\link[=n]{n()}}, \code{\link[=n_distinct]{n_distinct()}} \item Logical: \code{\link[=any]{any()}}, \code{\link[=all]{all()}} } } \section{Backend variations}{ The data frame backend supports creating a variable and using it in the same summary. This means that previously created summary variables can be further transformed or combined within the summary, as in \code{\link[=mutate]{mutate()}}. However, it also means that summary variables with the same names as previous variables overwrite them, making those variables unavailable to later summary variables. This behaviour may not be supported in other backends. To avoid unexpected results, consider using new names for your summary variables, especially when creating multiple summaries. } \section{Methods}{ This function is a \strong{generic}, which means that packages can provide implementations (methods) for other classes. See the documentation of individual methods for extra arguments and differences in behaviour. The following methods are currently available in loaded packages: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("summarise")}. } \examples{ # A summary applied to ungrouped tbl returns a single row mtcars \%>\% summarise(mean = mean(disp), n = n()) # Usually, you'll want to group first mtcars \%>\% group_by(cyl) \%>\% summarise(mean = mean(disp), n = n()) # Each summary call removes one grouping level (since that group # is now just a single row) mtcars \%>\% group_by(cyl, vs) \%>\% summarise(cyl_n = n()) \%>\% group_vars() # BEWARE: reusing variables may lead to unexpected results mtcars \%>\% group_by(cyl) \%>\% summarise(disp = mean(disp), sd = sd(disp)) # Refer to column names stored as strings with the `.data` pronoun: var <- "mass" summarise(starwars, avg = mean(.data[[var]], na.rm = TRUE)) # Learn more in ?rlang::args_data_masking # In dplyr 1.1.0, returning multiple rows per group was deprecated in favor # of `reframe()`, which never messages and always returns an ungrouped # result: mtcars \%>\% group_by(cyl) \%>\% summarise(qs = quantile(disp, c(0.25, 0.75)), prob = c(0.25, 0.75)) # -> mtcars \%>\% group_by(cyl) \%>\% reframe(qs = quantile(disp, c(0.25, 0.75)), prob = c(0.25, 0.75)) } \seealso{ Other single table verbs: \code{\link{arrange}()}, \code{\link{filter}()}, \code{\link{mutate}()}, \code{\link{reframe}()}, \code{\link{rename}()}, \code{\link{select}()}, \code{\link{slice}()} } \concept{single table verbs} dplyr/man/bind_cols.Rd0000644000176200001440000000243314525503021014401 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/bind-cols.R \name{bind_cols} \alias{bind_cols} \title{Bind multiple data frames by column} \usage{ bind_cols( ..., .name_repair = c("unique", "universal", "check_unique", "minimal") ) } \arguments{ \item{...}{Data frames to combine. Each argument can either be a data frame, a list that could be a data frame, or a list of data frames. Inputs are \link[vctrs:theory-faq-recycling]{recycled} to the same length, then matched by position.} \item{.name_repair}{One of \code{"unique"}, \code{"universal"}, or \code{"check_unique"}. See \code{\link[vctrs:vec_as_names]{vctrs::vec_as_names()}} for the meaning of these options.} } \value{ A data frame the same type as the first element of \code{...}. } \description{ Bind any number of data frames by column, making a wider result. This is similar to \code{do.call(cbind, dfs)}. Where possible prefer using a \link[=left_join]{join} to combine multiple data frames. \code{bind_cols()} binds the rows in order in which they appear so it is easy to create meaningless results without realising it. } \examples{ df1 <- tibble(x = 1:3) df2 <- tibble(y = 3:1) bind_cols(df1, df2) # Row sizes must be compatible when column-binding try(bind_cols(tibble(x = 1:3), tibble(y = 1:2))) } dplyr/man/dplyr-locale.Rd0000644000176200001440000000657314366556340015064 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/locale.R \name{dplyr-locale} \alias{dplyr-locale} \title{Locale used by \code{arrange()}} \description{ This page documents details about the locale used by \code{\link[=arrange]{arrange()}} when ordering character vectors. \subsection{Default locale}{ The default locale used by \code{arrange()} is the C locale. This is used when \code{.locale = NULL} unless the \code{dplyr.legacy_locale} global option is set to \code{TRUE}. You can also force the C locale to be used unconditionally with \code{.locale = "C"}. The C locale is not exactly the same as English locales, such as \code{"en"}. The main difference is that the C locale groups the English alphabet by \emph{case}, while most English locales group the alphabet by \emph{letter}. For example, \code{c("a", "b", "C", "B", "c")} will sort as \code{c("B", "C", "a", "b", "c")} in the C locale, with all uppercase letters coming before lowercase letters, but will sort as \code{c("a", "b", "B", "c", "C")} in an English locale. This often makes little practical difference during data analysis, because both return identical results when case is consistent between observations. } \subsection{Reproducibility}{ The C locale has the benefit of being completely reproducible across all supported R versions and operating systems with no extra effort. If you set \code{.locale} to an option from \code{\link[stringi:stri_locale_list]{stringi::stri_locale_list()}}, then stringi must be installed by anyone who wants to run your code. If you utilize this in a package, then stringi should be placed in \code{Imports}. } \subsection{Legacy behavior}{ Prior to dplyr 1.1.0, character columns were ordered in the system locale. If you need to temporarily revert to this behavior, you can set the global option \code{dplyr.legacy_locale} to \code{TRUE}, but this should be used sparingly and you should expect this option to be removed in a future version of dplyr. It is better to update existing code to explicitly use \code{.locale} instead. Note that setting \code{dplyr.legacy_locale} will also force calls to \code{\link[=group_by]{group_by()}} to use the system locale when internally ordering the groups. Setting \code{.locale} will override any usage of \code{dplyr.legacy_locale}. } } \examples{ \dontshow{if (dplyr:::has_minimum_stringi()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} df <- tibble(x = c("a", "b", "C", "B", "c")) df # Default locale is C, which groups the English alphabet by case, placing # uppercase letters before lowercase letters. arrange(df, x) # The American English locale groups the alphabet by letter. # Explicitly override `.locale` with `"en"` for this ordering. arrange(df, x, .locale = "en") # This Danish letter is expected to sort after `z` df <- tibble(x = c("o", "p", "\u00F8", "z")) df # The American English locale sorts it right after `o` arrange(df, x, .locale = "en") # Using `"da"` for Danish ordering gives the expected result arrange(df, x, .locale = "da") # If you need the legacy behavior of `arrange()`, which respected the # system locale, then you can set the global option `dplyr.legacy_locale`, # but expect this to be removed in the future. We recommend that you use # the `.locale` argument instead. rlang::with_options(dplyr.legacy_locale = TRUE, { arrange(df, x) }) \dontshow{\}) # examplesIf} } \keyword{internal} dplyr/man/group_nest.Rd0000644000176200001440000000445214366556340014654 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/group-nest.R \name{group_nest} \alias{group_nest} \title{Nest a tibble using a grouping specification} \usage{ group_nest(.tbl, ..., .key = "data", keep = FALSE) } \arguments{ \item{.tbl}{A tbl} \item{...}{Grouping specification, forwarded to \code{\link[=group_by]{group_by()}}} \item{.key}{the name of the list column} \item{keep}{Should the grouping columns be kept in the list column.} } \value{ A tbl with one row per unique combination of the grouping variables. The first columns are the grouping variables, followed by a list column of tibbles with matching rows of the remaining columns. } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} Nest a tibble using a grouping specification } \section{Lifecycle}{ \code{group_nest()} is not stable because \code{\link[tidyr:nest]{tidyr::nest(.by =)}} provides very similar behavior. It may be deprecated in the future. } \section{Grouped data frames}{ The primary use case for \code{\link[=group_nest]{group_nest()}} is with already grouped data frames, typically a result of \code{\link[=group_by]{group_by()}}. In this case \code{\link[=group_nest]{group_nest()}} only uses the first argument, the grouped tibble, and warns when \code{...} is used. } \section{Ungrouped data frames}{ When used on ungrouped data frames, \code{\link[=group_nest]{group_nest()}} forwards the \code{...} to \code{\link[=group_by]{group_by()}} before nesting, therefore the \code{...} are subject to the data mask. } \examples{ #----- use case 1: a grouped data frame iris \%>\% group_by(Species) \%>\% group_nest() # this can be useful if the grouped data has been altered before nesting iris \%>\% group_by(Species) \%>\% filter(Sepal.Length > mean(Sepal.Length)) \%>\% group_nest() #----- use case 2: using group_nest() on a ungrouped data frame with # a grouping specification that uses the data mask starwars \%>\% group_nest(species, homeworld) } \seealso{ Other grouping functions: \code{\link{group_by}()}, \code{\link{group_map}()}, \code{\link{group_split}()}, \code{\link{group_trim}()} } \concept{grouping functions} \keyword{internal} dplyr/man/starwars.Rd0000644000176200001440000000232114525503021014307 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/data-starwars.R \docType{data} \name{starwars} \alias{starwars} \title{Starwars characters} \format{ A tibble with 87 rows and 14 variables: \describe{ \item{name}{Name of the character} \item{height}{Height (cm)} \item{mass}{Weight (kg)} \item{hair_color,skin_color,eye_color}{Hair, skin, and eye colors} \item{birth_year}{Year born (BBY = Before Battle of Yavin)} \item{sex}{The biological sex of the character, namely male, female, hermaphroditic, or none (as in the case for Droids).} \item{gender}{The gender role or gender identity of the character as determined by their personality or the way they were programmed (as in the case for Droids).} \item{homeworld}{Name of homeworld} \item{species}{Name of species} \item{films}{List of films the character appeared in} \item{vehicles}{List of vehicles the character has piloted} \item{starships}{List of starships the character has piloted} } } \usage{ starwars } \description{ The original data, from SWAPI, the Star Wars API, \url{https://swapi.py4e.com/}, has been revised to reflect additional research into gender and sex determinations of characters. } \examples{ starwars } \keyword{datasets} dplyr/man/select.Rd0000644000176200001440000002166414416000530013727 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/select.R \name{select} \alias{select} \title{Keep or drop columns using their names and types} \usage{ select(.data, ...) } \arguments{ \item{.data}{A data frame, data frame extension (e.g. a tibble), or a lazy data frame (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for more details.} \item{...}{<\code{\link[=dplyr_tidy_select]{tidy-select}}> One or more unquoted expressions separated by commas. Variable names can be used as if they were positions in the data frame, so expressions like \code{x:y} can be used to select a range of variables.} } \value{ An object of the same type as \code{.data}. The output has the following properties: \itemize{ \item Rows are not affected. \item Output columns are a subset of input columns, potentially with a different order. Columns will be renamed if \code{new_name = old_name} form is used. \item Data frame attributes are preserved. \item Groups are maintained; you can't select off grouping variables. } } \description{ Select (and optionally rename) variables in a data frame, using a concise mini-language that makes it easy to refer to variables based on their name (e.g. \code{a:f} selects all columns from \code{a} on the left to \code{f} on the right) or type (e.g. \code{where(is.numeric)} selects all numeric columns). \subsection{Overview of selection features}{ Tidyverse selections implement a dialect of R where operators make it easy to select variables: \itemize{ \item \code{:} for selecting a range of consecutive variables. \item \code{!} for taking the complement of a set of variables. \item \code{&} and \code{|} for selecting the intersection or the union of two sets of variables. \item \code{c()} for combining selections. } In addition, you can use \strong{selection helpers}. Some helpers select specific columns: \itemize{ \item \code{\link[tidyselect:everything]{everything()}}: Matches all variables. \item \code{\link[tidyselect:everything]{last_col()}}: Select last variable, possibly with an offset. \item \code{\link[=group_cols]{group_cols()}}: Select all grouping columns. } Other helpers select variables by matching patterns in their names: \itemize{ \item \code{\link[tidyselect:starts_with]{starts_with()}}: Starts with a prefix. \item \code{\link[tidyselect:starts_with]{ends_with()}}: Ends with a suffix. \item \code{\link[tidyselect:starts_with]{contains()}}: Contains a literal string. \item \code{\link[tidyselect:starts_with]{matches()}}: Matches a regular expression. \item \code{\link[tidyselect:starts_with]{num_range()}}: Matches a numerical range like x01, x02, x03. } Or from variables stored in a character vector: \itemize{ \item \code{\link[tidyselect:all_of]{all_of()}}: Matches variable names in a character vector. All names must be present, otherwise an out-of-bounds error is thrown. \item \code{\link[tidyselect:all_of]{any_of()}}: Same as \code{all_of()}, except that no error is thrown for names that don't exist. } Or using a predicate function: \itemize{ \item \code{\link[tidyselect:where]{where()}}: Applies a function to all variables and selects those for which the function returns \code{TRUE}. } } } \section{Methods}{ This function is a \strong{generic}, which means that packages can provide implementations (methods) for other classes. See the documentation of individual methods for extra arguments and differences in behaviour. The following methods are currently available in loaded packages: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("select")}. } \section{Examples}{ Here we show the usage for the basic selection operators. See the specific help pages to learn about helpers like \code{\link[=starts_with]{starts_with()}}. The selection language can be used in functions like \code{dplyr::select()} or \code{tidyr::pivot_longer()}. Let's first attach the tidyverse: \if{html}{\out{
}}\preformatted{library(tidyverse) # For better printing iris <- as_tibble(iris) }\if{html}{\out{
}} Select variables by name: \if{html}{\out{
}}\preformatted{starwars \%>\% select(height) #> # A tibble: 87 x 1 #> height #> #> 1 172 #> 2 167 #> 3 96 #> 4 202 #> # i 83 more rows iris \%>\% pivot_longer(Sepal.Length) #> # A tibble: 150 x 6 #> Sepal.Width Petal.Length Petal.Width Species name value #> #> 1 3.5 1.4 0.2 setosa Sepal.Length 5.1 #> 2 3 1.4 0.2 setosa Sepal.Length 4.9 #> 3 3.2 1.3 0.2 setosa Sepal.Length 4.7 #> 4 3.1 1.5 0.2 setosa Sepal.Length 4.6 #> # i 146 more rows }\if{html}{\out{
}} Select multiple variables by separating them with commas. Note how the order of columns is determined by the order of inputs: \if{html}{\out{
}}\preformatted{starwars \%>\% select(homeworld, height, mass) #> # A tibble: 87 x 3 #> homeworld height mass #> #> 1 Tatooine 172 77 #> 2 Tatooine 167 75 #> 3 Naboo 96 32 #> 4 Tatooine 202 136 #> # i 83 more rows }\if{html}{\out{
}} Functions like \code{tidyr::pivot_longer()} don't take variables with dots. In this case use \code{c()} to select multiple variables: \if{html}{\out{
}}\preformatted{iris \%>\% pivot_longer(c(Sepal.Length, Petal.Length)) #> # A tibble: 300 x 5 #> Sepal.Width Petal.Width Species name value #> #> 1 3.5 0.2 setosa Sepal.Length 5.1 #> 2 3.5 0.2 setosa Petal.Length 1.4 #> 3 3 0.2 setosa Sepal.Length 4.9 #> 4 3 0.2 setosa Petal.Length 1.4 #> # i 296 more rows }\if{html}{\out{
}} \subsection{Operators:}{ The \code{:} operator selects a range of consecutive variables: \if{html}{\out{
}}\preformatted{starwars \%>\% select(name:mass) #> # A tibble: 87 x 3 #> name height mass #> #> 1 Luke Skywalker 172 77 #> 2 C-3PO 167 75 #> 3 R2-D2 96 32 #> 4 Darth Vader 202 136 #> # i 83 more rows }\if{html}{\out{
}} The \code{!} operator negates a selection: \if{html}{\out{
}}\preformatted{starwars \%>\% select(!(name:mass)) #> # A tibble: 87 x 11 #> hair_color skin_color eye_color birth_year sex gender homeworld species #> #> 1 blond fair blue 19 male masculine Tatooine Human #> 2 gold yellow 112 none masculine Tatooine Droid #> 3 white, blue red 33 none masculine Naboo Droid #> 4 none white yellow 41.9 male masculine Tatooine Human #> # i 83 more rows #> # i 3 more variables: films , vehicles , starships iris \%>\% select(!c(Sepal.Length, Petal.Length)) #> # A tibble: 150 x 3 #> Sepal.Width Petal.Width Species #> #> 1 3.5 0.2 setosa #> 2 3 0.2 setosa #> 3 3.2 0.2 setosa #> 4 3.1 0.2 setosa #> # i 146 more rows iris \%>\% select(!ends_with("Width")) #> # A tibble: 150 x 3 #> Sepal.Length Petal.Length Species #> #> 1 5.1 1.4 setosa #> 2 4.9 1.4 setosa #> 3 4.7 1.3 setosa #> 4 4.6 1.5 setosa #> # i 146 more rows }\if{html}{\out{
}} \code{&} and \code{|} take the intersection or the union of two selections: \if{html}{\out{
}}\preformatted{iris \%>\% select(starts_with("Petal") & ends_with("Width")) #> # A tibble: 150 x 1 #> Petal.Width #> #> 1 0.2 #> 2 0.2 #> 3 0.2 #> 4 0.2 #> # i 146 more rows iris \%>\% select(starts_with("Petal") | ends_with("Width")) #> # A tibble: 150 x 3 #> Petal.Length Petal.Width Sepal.Width #> #> 1 1.4 0.2 3.5 #> 2 1.4 0.2 3 #> 3 1.3 0.2 3.2 #> 4 1.5 0.2 3.1 #> # i 146 more rows }\if{html}{\out{
}} To take the difference between two selections, combine the \code{&} and \code{!} operators: \if{html}{\out{
}}\preformatted{iris \%>\% select(starts_with("Petal") & !ends_with("Width")) #> # A tibble: 150 x 1 #> Petal.Length #> #> 1 1.4 #> 2 1.4 #> 3 1.3 #> 4 1.5 #> # i 146 more rows }\if{html}{\out{
}} } } \seealso{ Other single table verbs: \code{\link{arrange}()}, \code{\link{filter}()}, \code{\link{mutate}()}, \code{\link{reframe}()}, \code{\link{rename}()}, \code{\link{slice}()}, \code{\link{summarise}()} } \concept{single table verbs} dplyr/man/tbl_df.Rd0000644000176200001440000000101714266276767013726 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/deprec-tibble.R \name{tbl_df} \alias{tbl_df} \alias{as.tbl} \title{Coerce to a tibble} \usage{ tbl_df(data) as.tbl(x, ...) } \arguments{ \item{data, x}{Object to coerce} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} Please use \code{\link[tibble:as_tibble]{tibble::as_tibble()}} instead. } \keyword{internal} dplyr/man/nth.Rd0000644000176200001440000000540214366556340013254 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/nth-value.R \name{nth} \alias{nth} \alias{first} \alias{last} \title{Extract the first, last, or nth value from a vector} \usage{ nth(x, n, order_by = NULL, default = NULL, na_rm = FALSE) first(x, order_by = NULL, default = NULL, na_rm = FALSE) last(x, order_by = NULL, default = NULL, na_rm = FALSE) } \arguments{ \item{x}{A vector} \item{n}{For \code{nth()}, a single integer specifying the position. Negative integers index from the end (i.e. \code{-1L} will return the last value in the vector).} \item{order_by}{An optional vector the same size as \code{x} used to determine the order.} \item{default}{A default value to use if the position does not exist in \code{x}. If \code{NULL}, the default, a missing value is used. If supplied, this must be a single value, which will be cast to the type of \code{x}. When \code{x} is a list , \code{default} is allowed to be any value. There are no type or size restrictions in this case.} \item{na_rm}{Should missing values in \code{x} be removed before extracting the value?} } \value{ If \code{x} is a list, a single element from that list. Otherwise, a vector the same type as \code{x} with size 1. } \description{ These are useful helpers for extracting a single value from a vector. They are guaranteed to return a meaningful value, even when the input is shorter than expected. You can also provide an optional secondary vector that defines the ordering. } \details{ For most vector types, \code{first(x)}, \code{last(x)}, and \code{nth(x, n)} work like \code{x[[1]]}, \verb{x[[length(x)]}, and \code{x[[n]]}, respectively. The primary exception is data frames, where they instead retrieve rows, i.e. \code{x[1, ]}, \code{x[nrow(x), ]}, and \code{x[n, ]}. This is consistent with the tidyverse/vctrs principle which treats data frames as a vector of rows, rather than a vector of columns. } \examples{ x <- 1:10 y <- 10:1 first(x) last(y) nth(x, 1) nth(x, 5) nth(x, -2) # `first()` and `last()` are often useful in `summarise()` df <- tibble(x = x, y = y) df \%>\% summarise( across(x:y, first, .names = "{col}_first"), y_last = last(y) ) # Selecting a position that is out of bounds returns a default value nth(x, 11) nth(x, 0) # This out of bounds behavior also applies to empty vectors first(integer()) # You can customize the default value with `default` nth(x, 11, default = -1L) first(integer(), default = 0L) # `order_by` provides optional ordering last(x) last(x, order_by = y) # `na_rm` removes missing values before extracting the value z <- c(NA, NA, 1, 3, NA, 5, NA) first(z) first(z, na_rm = TRUE) last(z, na_rm = TRUE) nth(z, 3, na_rm = TRUE) # For data frames, these select entire rows df <- tibble(a = 1:5, b = 6:10) first(df) nth(df, 4) } dplyr/man/same_src.Rd0000644000176200001440000000062314366556340014257 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/src.R \name{same_src} \alias{same_src} \title{Figure out if two sources are the same (or two tbl have the same source)} \usage{ same_src(x, y) } \arguments{ \item{x, y}{src or tbls to test} } \value{ a logical flag } \description{ Figure out if two sources are the same (or two tbl have the same source) } \keyword{internal} dplyr/man/lead-lag.Rd0000644000176200001440000000335714366556340014140 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/lead-lag.R \name{lead-lag} \alias{lead-lag} \alias{lag} \alias{lead} \title{Compute lagged or leading values} \usage{ lag(x, n = 1L, default = NULL, order_by = NULL, ...) lead(x, n = 1L, default = NULL, order_by = NULL, ...) } \arguments{ \item{x}{A vector} \item{n}{Positive integer of length 1, giving the number of positions to lag or lead by} \item{default}{The value used to pad \code{x} back to its original size after the lag or lead has been applied. The default, \code{NULL}, pads with a missing value. If supplied, this must be a vector with size 1, which will be cast to the type of \code{x}.} \item{order_by}{An optional secondary vector that defines the ordering to use when applying the lag or lead to \code{x}. If supplied, this must be the same size as \code{x}.} \item{...}{Not used.} } \value{ A vector with the same type and size as \code{x}. } \description{ Find the "previous" (\code{lag()}) or "next" (\code{lead()}) values in a vector. Useful for comparing values behind of or ahead of the current values. } \examples{ lag(1:5) lead(1:5) x <- 1:5 tibble(behind = lag(x), x, ahead = lead(x)) # If you want to look more rows behind or ahead, use `n` lag(1:5, n = 1) lag(1:5, n = 2) lead(1:5, n = 1) lead(1:5, n = 2) # If you want to define a value to pad with, use `default` lag(1:5) lag(1:5, default = 0) lead(1:5) lead(1:5, default = 6) # If the data are not already ordered, use `order_by` scrambled <- slice_sample( tibble(year = 2000:2005, value = (0:5) ^ 2), prop = 1 ) wrong <- mutate(scrambled, previous_year_value = lag(value)) arrange(wrong, year) right <- mutate(scrambled, previous_year_value = lag(value, order_by = year)) arrange(right, year) } dplyr/man/storms.Rd0000644000176200001440000000363614525503021014002 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/data-storms.R \docType{data} \name{storms} \alias{storms} \title{Storm tracks data} \format{ A tibble with 19,537 observations and 13 variables: \describe{ \item{name}{Storm Name} \item{year,month,day}{Date of report} \item{hour}{Hour of report (in UTC)} \item{lat,long}{Location of storm center} \item{status}{Storm classification (Tropical Depression, Tropical Storm, or Hurricane)} \item{category}{Saffir-Simpson hurricane category calculated from wind speed. \itemize{ \item \code{NA}: Not a hurricane \item 1: 64+ knots \item 2: 83+ knots \item 3: 96+ knots \item 4: 113+ knots \item 5: 137+ knots } } \item{wind}{storm's maximum sustained wind speed (in knots)} \item{pressure}{Air pressure at the storm's center (in millibars)} \item{tropicalstorm_force_diameter}{Diameter (in nautical miles) of the area experiencing tropical storm strength winds (34 knots or above). Only available starting in 2004.} \item{hurricane_force_diameter}{Diameter (in nautical miles) of the area experiencing hurricane strength winds (64 knots or above). Only available starting in 2004.} } } \usage{ storms } \description{ This dataset is the NOAA Atlantic hurricane database best track data, \url{https://www.nhc.noaa.gov/data/#hurdat}. The data includes the positions and attributes of storms from 1975-2022. Storms from 1979 onward are measured every six hours during the lifetime of the storm. Storms in earlier years have some missing data. } \examples{ storms # Show a few recent storm paths if (requireNamespace("ggplot2", quietly = TRUE)) { library(ggplot2) storms \%>\% filter(year >= 2000) \%>\% ggplot(aes(long, lat, color = paste(year, name))) + geom_path(show.legend = FALSE) + facet_wrap(~year) } storms } \seealso{ The script to create the storms data set: \url{https://github.com/tidyverse/dplyr/blob/main/data-raw/storms.R} } \keyword{datasets} dplyr/man/auto_copy.Rd0000644000176200001440000000125314366556340014465 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/copy-to.R \name{auto_copy} \alias{auto_copy} \title{Copy tables to same source, if necessary} \usage{ auto_copy(x, y, copy = FALSE, ...) } \arguments{ \item{x, y}{\code{y} will be copied to \code{x}, if necessary.} \item{copy}{If \code{x} and \code{y} are not from the same data source, and \code{copy} is \code{TRUE}, then \code{y} will be copied into the same src as \code{x}. This allows you to join tables across srcs, but it is a potentially expensive operation so you must opt into it.} \item{...}{Other arguments passed on to methods.} } \description{ Copy tables to same source, if necessary } dplyr/man/scoped.Rd0000644000176200001440000001221614366556340013741 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/colwise.R \name{scoped} \alias{scoped} \title{Operate on a selection of variables} \arguments{ \item{.tbl}{A \code{tbl} object.} \item{.funs}{A function \code{fun}, a quosure style lambda \code{~ fun(.)} or a list of either form.} \item{.vars}{A list of columns generated by \code{\link[=vars]{vars()}}, a character vector of column names, a numeric vector of column positions, or \code{NULL}.} \item{.predicate}{A predicate function to be applied to the columns or a logical vector. The variables for which \code{.predicate} is or returns \code{TRUE} are selected. This argument is passed to \code{\link[rlang:as_function]{rlang::as_function()}} and thus supports quosure-style lambda functions and strings representing function names.} \item{...}{Additional arguments for the function calls in \code{.funs}. These are evaluated only once, with \link[rlang:dyn-dots]{tidy dots} support.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#superseded}{\figure{lifecycle-superseded.svg}{options: alt='[Superseded]'}}}{\strong{[Superseded]}} Scoped verbs (\verb{_if}, \verb{_at}, \verb{_all}) have been superseded by the use of \code{\link[=pick]{pick()}} or \code{\link[=across]{across()}} in an existing verb. See \code{vignette("colwise")} for details. The variants suffixed with \verb{_if}, \verb{_at} or \verb{_all} apply an expression (sometimes several) to all variables within a specified subset. This subset can contain all variables (\verb{_all} variants), a \code{\link[=vars]{vars()}} selection (\verb{_at} variants), or variables selected with a predicate (\verb{_if} variants). The verbs with scoped variants are: \itemize{ \item \code{\link[=mutate]{mutate()}}, \code{\link[=transmute]{transmute()}} and \code{\link[=summarise]{summarise()}}. See \code{\link[=summarise_all]{summarise_all()}}. \item \code{\link[=filter]{filter()}}. See \code{\link[=filter_all]{filter_all()}}. \item \code{\link[=group_by]{group_by()}}. See \code{\link[=group_by_all]{group_by_all()}}. \item \code{\link[=rename]{rename()}} and \code{\link[=select]{select()}}. See \code{\link[=select_all]{select_all()}}. \item \code{\link[=arrange]{arrange()}}. See \code{\link[=arrange_all]{arrange_all()}} } There are three kinds of scoped variants. They differ in the scope of the variable selection on which operations are applied: \itemize{ \item Verbs suffixed with \verb{_all()} apply an operation on all variables. \item Verbs suffixed with \verb{_at()} apply an operation on a subset of variables specified with the quoting function \code{\link[=vars]{vars()}}. This quoting function accepts \code{\link[tidyselect:vars_select]{tidyselect::vars_select()}} helpers like \code{\link[=starts_with]{starts_with()}}. Instead of a \code{\link[=vars]{vars()}} selection, you can also supply an \link[rlang:is_integerish]{integerish} vector of column positions or a character vector of column names. \item Verbs suffixed with \verb{_if()} apply an operation on the subset of variables for which a predicate function returns \code{TRUE}. Instead of a predicate function, you can also supply a logical vector. } } \section{Grouping variables}{ Most of these operations also apply on the grouping variables when they are part of the selection. This includes: \itemize{ \item \code{\link[=arrange_all]{arrange_all()}}, \code{\link[=arrange_at]{arrange_at()}}, and \code{\link[=arrange_if]{arrange_if()}} \item \code{\link[=distinct_all]{distinct_all()}}, \code{\link[=distinct_at]{distinct_at()}}, and \code{\link[=distinct_if]{distinct_if()}} \item \code{\link[=filter_all]{filter_all()}}, \code{\link[=filter_at]{filter_at()}}, and \code{\link[=filter_if]{filter_if()}} \item \code{\link[=group_by_all]{group_by_all()}}, \code{\link[=group_by_at]{group_by_at()}}, and \code{\link[=group_by_if]{group_by_if()}} \item \code{\link[=select_all]{select_all()}}, \code{\link[=select_at]{select_at()}}, and \code{\link[=select_if]{select_if()}} } This is not the case for summarising and mutating variants where operations are \emph{not} applied on grouping variables. The behaviour depends on whether the selection is \strong{implicit} (\code{all} and \code{if} selections) or \strong{explicit} (\code{at} selections). Grouping variables covered by explicit selections (with \code{\link[=summarise_at]{summarise_at()}}, \code{\link[=mutate_at]{mutate_at()}}, and \code{\link[=transmute_at]{transmute_at()}}) are always an error. For implicit selections, the grouping variables are always ignored. In this case, the level of verbosity depends on the kind of operation: \itemize{ \item Summarising operations (\code{\link[=summarise_all]{summarise_all()}} and \code{\link[=summarise_if]{summarise_if()}}) ignore grouping variables silently because it is obvious that operations are not applied on grouping variables. \item On the other hand it isn't as obvious in the case of mutating operations (\code{\link[=mutate_all]{mutate_all()}}, \code{\link[=mutate_if]{mutate_if()}}, \code{\link[=transmute_all]{transmute_all()}}, and \code{\link[=transmute_if]{transmute_if()}}). For this reason, they issue a message indicating which grouping variables are ignored. } } dplyr/man/src.Rd0000644000176200001440000000115614366556340013254 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/src.R \name{src} \alias{src} \alias{is.src} \title{Create a "src" object} \usage{ src(subclass, ...) is.src(x) } \arguments{ \item{subclass}{name of subclass. "src" is an abstract base class, so you must supply this value. \code{src_} is automatically prepended to the class name} \item{...}{fields used by object. These dots are evaluated with \link[rlang:list2]{explicit splicing}.} \item{x}{object to test for "src"-ness.} } \description{ \code{src()} is the standard constructor for srcs and \code{is.src()} tests. } \keyword{internal} dplyr/man/desc.Rd0000644000176200001440000000077714366556340013413 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/desc.R \name{desc} \alias{desc} \title{Descending order} \usage{ desc(x) } \arguments{ \item{x}{vector to transform} } \description{ Transform a vector into a format that will be sorted in descending order. This is useful within \code{\link[=arrange]{arrange()}}. } \examples{ desc(1:10) desc(factor(letters)) first_day <- seq(as.Date("1910/1/1"), as.Date("1920/1/1"), "years") desc(first_day) starwars \%>\% arrange(desc(mass)) } dplyr/man/dplyr-package.Rd0000644000176200001440000000203214406402754015174 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/dplyr.R \docType{package} \name{dplyr-package} \alias{dplyr} \alias{dplyr-package} \title{dplyr: A Grammar of Data Manipulation} \description{ To learn more about dplyr, start with the vignettes: \code{browseVignettes(package = "dplyr")} } \seealso{ Useful links: \itemize{ \item \url{https://dplyr.tidyverse.org} \item \url{https://github.com/tidyverse/dplyr} \item Report bugs at \url{https://github.com/tidyverse/dplyr/issues} } } \author{ \strong{Maintainer}: Hadley Wickham \email{hadley@posit.co} (\href{https://orcid.org/0000-0003-4757-117X}{ORCID}) Authors: \itemize{ \item Romain François (\href{https://orcid.org/0000-0002-2444-4226}{ORCID}) \item Lionel Henry \item Kirill Müller (\href{https://orcid.org/0000-0002-1416-3412}{ORCID}) \item Davis Vaughan \email{davis@posit.co} (\href{https://orcid.org/0000-0003-4777-038X}{ORCID}) } Other contributors: \itemize{ \item Posit Software, PBC [copyright holder, funder] } } \keyword{internal} dplyr/man/arrange.Rd0000644000176200001440000000751014406402754014076 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/arrange.R \name{arrange} \alias{arrange} \alias{arrange.data.frame} \title{Order rows using column values} \usage{ arrange(.data, ..., .by_group = FALSE) \method{arrange}{data.frame}(.data, ..., .by_group = FALSE, .locale = NULL) } \arguments{ \item{.data}{A data frame, data frame extension (e.g. a tibble), or a lazy data frame (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for more details.} \item{...}{<\code{\link[rlang:args_data_masking]{data-masking}}> Variables, or functions of variables. Use \code{\link[=desc]{desc()}} to sort a variable in descending order.} \item{.by_group}{If \code{TRUE}, will sort first by grouping variable. Applies to grouped data frames only.} \item{.locale}{The locale to sort character vectors in. \itemize{ \item If \code{NULL}, the default, uses the \code{"C"} locale unless the \code{dplyr.legacy_locale} global option escape hatch is active. See the \link{dplyr-locale} help page for more details. \item If a single string from \code{\link[stringi:stri_locale_list]{stringi::stri_locale_list()}} is supplied, then this will be used as the locale to sort with. For example, \code{"en"} will sort with the American English locale. This requires the stringi package. \item If \code{"C"} is supplied, then character vectors will always be sorted in the C locale. This does not require stringi and is often much faster than supplying a locale identifier. } The C locale is not the same as English locales, such as \code{"en"}, particularly when it comes to data containing a mix of upper and lower case letters. This is explained in more detail on the \link[=dplyr-locale]{locale} help page under the \verb{Default locale} section.} } \value{ An object of the same type as \code{.data}. The output has the following properties: \itemize{ \item All rows appear in the output, but (usually) in a different place. \item Columns are not modified. \item Groups are not modified. \item Data frame attributes are preserved. } } \description{ \code{arrange()} orders the rows of a data frame by the values of selected columns. Unlike other dplyr verbs, \code{arrange()} largely ignores grouping; you need to explicitly mention grouping variables (or use \code{.by_group = TRUE}) in order to group by them, and functions of variables are evaluated once per data frame, not once per group. } \details{ \subsection{Missing values}{ Unlike base sorting with \code{sort()}, \code{NA} are: \itemize{ \item always sorted to the end for local data, even when wrapped with \code{desc()}. \item treated differently for remote data, depending on the backend. } } } \section{Methods}{ This function is a \strong{generic}, which means that packages can provide implementations (methods) for other classes. See the documentation of individual methods for extra arguments and differences in behaviour. The following methods are currently available in loaded packages: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("arrange")}. } \examples{ arrange(mtcars, cyl, disp) arrange(mtcars, desc(disp)) # grouped arrange ignores groups by_cyl <- mtcars \%>\% group_by(cyl) by_cyl \%>\% arrange(desc(wt)) # Unless you specifically ask: by_cyl \%>\% arrange(desc(wt), .by_group = TRUE) # use embracing when wrapping in a function; # see ?rlang::args_data_masking for more details tidy_eval_arrange <- function(.data, var) { .data \%>\% arrange({{ var }}) } tidy_eval_arrange(mtcars, mpg) # Use `across()` or `pick()` to select columns with tidy-select iris \%>\% arrange(pick(starts_with("Sepal"))) iris \%>\% arrange(across(starts_with("Sepal"), desc)) } \seealso{ Other single table verbs: \code{\link{filter}()}, \code{\link{mutate}()}, \code{\link{reframe}()}, \code{\link{rename}()}, \code{\link{select}()}, \code{\link{slice}()}, \code{\link{summarise}()} } \concept{single table verbs} dplyr/man/nest_by.Rd0000644000176200001440000000745414366556340014137 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/nest-by.R \name{nest_by} \alias{nest_by} \title{Nest by one or more variables} \usage{ nest_by(.data, ..., .key = "data", .keep = FALSE) } \arguments{ \item{.data}{A data frame, data frame extension (e.g. a tibble), or a lazy data frame (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for more details.} \item{...}{In \code{group_by()}, variables or computations to group by. Computations are always done on the ungrouped data frame. To perform computations on the grouped data, you need to use a separate \code{mutate()} step before the \code{group_by()}. Computations are not allowed in \code{nest_by()}. In \code{ungroup()}, variables to remove from the grouping.} \item{.key}{Name of the list column} \item{.keep}{Should the grouping columns be kept in the list column.} } \value{ A \link{rowwise} data frame. The output has the following properties: \itemize{ \item The rows come from the underlying \code{\link[=group_keys]{group_keys()}}. \item The columns are the grouping keys plus one list-column of data frames. \item Data frame attributes are \strong{not} preserved, because \code{nest_by()} fundamentally creates a new data frame. } A tbl with one row per unique combination of the grouping variables. The first columns are the grouping variables, followed by a list column of tibbles with matching rows of the remaining columns. } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} \code{nest_by()} is closely related to \code{\link[=group_by]{group_by()}}. However, instead of storing the group structure in the metadata, it is made explicit in the data, giving each group key a single row along with a list-column of data frames that contain all the other data. \code{nest_by()} returns a \link{rowwise} data frame, which makes operations on the grouped data particularly elegant. See \code{vignette("rowwise")} for more details. } \details{ Note that \code{df \%>\% nest_by(x, y)} is roughly equivalent to \if{html}{\out{
}}\preformatted{df \%>\% group_by(x, y) \%>\% summarise(data = list(pick(everything()))) \%>\% rowwise() }\if{html}{\out{
}} If you want to unnest a nested data frame, you can either use \code{tidyr::unnest()} or take advantage of \code{reframe()}s multi-row behaviour: \if{html}{\out{
}}\preformatted{nested \%>\% reframe(data) }\if{html}{\out{
}} } \section{Lifecycle}{ \code{nest_by()} is not stable because \code{\link[tidyr:nest]{tidyr::nest(.by =)}} provides very similar behavior. It may be deprecated in the future. } \section{Methods}{ This function is a \strong{generic}, which means that packages can provide implementations (methods) for other classes. See the documentation of individual methods for extra arguments and differences in behaviour. The following methods are currently available in loaded packages: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("nest_by")}. } \examples{ # After nesting, you get one row per group iris \%>\% nest_by(Species) starwars \%>\% nest_by(species) # The output is grouped by row, which makes modelling particularly easy models <- mtcars \%>\% nest_by(cyl) \%>\% mutate(model = list(lm(mpg ~ wt, data = data))) models models \%>\% summarise(rsq = summary(model)$r.squared) \dontshow{if (requireNamespace("broom", quietly = TRUE)) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} # This is particularly elegant with the broom functions models \%>\% summarise(broom::glance(model)) models \%>\% reframe(broom::tidy(model)) \dontshow{\}) # examplesIf} # Note that you can also `reframe()` to unnest the data models \%>\% reframe(data) } \keyword{internal} dplyr/man/rowwise.Rd0000644000176200001440000000535114366556340014165 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/rowwise.R \name{rowwise} \alias{rowwise} \title{Group input by rows} \usage{ rowwise(data, ...) } \arguments{ \item{data}{Input data frame.} \item{...}{<\code{\link[=dplyr_tidy_select]{tidy-select}}> Variables to be preserved when calling \code{\link[=summarise]{summarise()}}. This is typically a set of variables whose combination uniquely identify each row. \strong{NB}: unlike \code{group_by()} you can not create new variables here but instead you can select multiple variables with (e.g.) \code{everything()}.} } \value{ A row-wise data frame with class \code{rowwise_df}. Note that a \code{rowwise_df} is implicitly grouped by row, but is not a \code{grouped_df}. } \description{ \code{rowwise()} allows you to compute on a data frame a row-at-a-time. This is most useful when a vectorised function doesn't exist. Most dplyr verbs preserve row-wise grouping. The exception is \code{\link[=summarise]{summarise()}}, which return a \link{grouped_df}. You can explicitly ungroup with \code{\link[=ungroup]{ungroup()}} or \code{\link[=as_tibble]{as_tibble()}}, or convert to a \link{grouped_df} with \code{\link[=group_by]{group_by()}}. } \section{List-columns}{ Because a rowwise has exactly one row per group it offers a small convenience for working with list-columns. Normally, \code{summarise()} and \code{mutate()} extract a groups worth of data with \code{[}. But when you index a list in this way, you get back another list. When you're working with a \code{rowwise} tibble, then dplyr will use \code{[[} instead of \code{[} to make your life a little easier. } \examples{ df <- tibble(x = runif(6), y = runif(6), z = runif(6)) # Compute the mean of x, y, z in each row df \%>\% rowwise() \%>\% mutate(m = mean(c(x, y, z))) # use c_across() to more easily select many variables df \%>\% rowwise() \%>\% mutate(m = mean(c_across(x:z))) # Compute the minimum of x and y in each row df \%>\% rowwise() \%>\% mutate(m = min(c(x, y, z))) # In this case you can use an existing vectorised function: df \%>\% mutate(m = pmin(x, y, z)) # Where these functions exist they'll be much faster than rowwise # so be on the lookout for them. # rowwise() is also useful when doing simulations params <- tribble( ~sim, ~n, ~mean, ~sd, 1, 1, 1, 1, 2, 2, 2, 4, 3, 3, -1, 2 ) # Here I supply variables to preserve after the computation params \%>\% rowwise(sim) \%>\% reframe(z = rnorm(n, mean, sd)) # If you want one row per simulation, put the results in a list() params \%>\% rowwise(sim) \%>\% summarise(z = list(rnorm(n, mean, sd)), .groups = "keep") } \seealso{ \code{\link[=nest_by]{nest_by()}} for a convenient way of creating rowwise data frames with nested data. } dplyr/man/deprec-context.Rd0000644000176200001440000000142514366556340015410 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/deprec-context.R \name{deprec-context} \alias{deprec-context} \alias{cur_data} \alias{cur_data_all} \title{Information about the "current" group or variable} \usage{ cur_data() cur_data_all() } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} These functions were deprecated in dplyr 1.1.0. \itemize{ \item \code{cur_data()} is deprecated in favor of \code{\link[=pick]{pick()}}. \item \code{cur_data_all()} is deprecated but does not have a direct replacement as selecting the grouping variables is not well-defined and is unlikely to ever be useful. } } \keyword{internal} dplyr/man/case_match.Rd0000644000176200001440000000733014525503021014535 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/case-match.R \name{case_match} \alias{case_match} \title{A general vectorised \code{switch()}} \usage{ case_match(.x, ..., .default = NULL, .ptype = NULL) } \arguments{ \item{.x}{A vector to match against.} \item{...}{<\code{\link[rlang:dyn-dots]{dynamic-dots}}> A sequence of two-sided formulas: \code{old_values ~ new_value}. The right hand side (RHS) determines the output value for all values of \code{.x} that match the left hand side (LHS). The LHS must evaluate to the same type of vector as \code{.x}. It can be any length, allowing you to map multiple \code{.x} values to the same RHS value. If a value is repeated in the LHS, i.e. a value in \code{.x} matches to multiple cases, the first match is used. The RHS inputs will be coerced to their common type. Each RHS input will be \link[vctrs:theory-faq-recycling]{recycled} to the size of \code{.x}.} \item{.default}{The value used when values in \code{.x} aren't matched by any of the LHS inputs. If \code{NULL}, the default, a missing value will be used. \code{.default} is \link[vctrs:theory-faq-recycling]{recycled} to the size of \code{.x}.} \item{.ptype}{An optional prototype declaring the desired output type. If not supplied, the output type will be taken from the common type of all RHS inputs and \code{.default}.} } \value{ A vector with the same size as \code{.x} and the same type as the common type of the RHS inputs and \code{.default} (if not overridden by \code{.ptype}). } \description{ This function allows you to vectorise multiple \code{\link[=switch]{switch()}} statements. Each case is evaluated sequentially and the first match for each element determines the corresponding value in the output vector. If no cases match, the \code{.default} is used. \code{case_match()} is an R equivalent of the SQL "simple" \verb{CASE WHEN} statement. \subsection{Connection to \code{case_when()}}{ While \code{\link[=case_when]{case_when()}} uses logical expressions on the left-hand side of the formula, \code{case_match()} uses values to match against \code{.x} with. The following two statements are roughly equivalent: \if{html}{\out{
}}\preformatted{case_when( x \%in\% c("a", "b") ~ 1, x \%in\% "c" ~ 2, x \%in\% c("d", "e") ~ 3 ) case_match( x, c("a", "b") ~ 1, "c" ~ 2, c("d", "e") ~ 3 ) }\if{html}{\out{
}} } } \examples{ x <- c("a", "b", "a", "d", "b", NA, "c", "e") # `case_match()` acts like a vectorized `switch()`. # Unmatched values "fall through" as a missing value. case_match( x, "a" ~ 1, "b" ~ 2, "c" ~ 3, "d" ~ 4 ) # Missing values can be matched exactly, and `.default` can be used to # control the value used for unmatched values of `.x` case_match( x, "a" ~ 1, "b" ~ 2, "c" ~ 3, "d" ~ 4, NA ~ 0, .default = 100 ) # Input values can be grouped into the same expression to map them to the # same output value case_match( x, c("a", "b") ~ "low", c("c", "d", "e") ~ "high" ) # `case_match()` isn't limited to character input: y <- c(1, 2, 1, 3, 1, NA, 2, 4) case_match( y, c(1, 3) ~ "odd", c(2, 4) ~ "even", .default = "missing" ) # Setting `.default` to the original vector is a useful way to replace # selected values, leaving everything else as is case_match(y, NA ~ 0, .default = y) starwars \%>\% mutate( # Replace missings, but leave everything else alone hair_color = case_match(hair_color, NA ~ "unknown", .default = hair_color), # Replace some, but not all, of the species species = case_match( species, "Human" ~ "Humanoid", "Droid" ~ "Robot", c("Wookiee", "Ewok") ~ "Hairy", .default = species ), .keep = "used" ) } \seealso{ \code{\link[=case_when]{case_when()}} } dplyr/man/dplyr_extending.Rd0000644000176200001440000001245414406402754015661 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/generics.R \name{dplyr_extending} \alias{dplyr_extending} \alias{dplyr_row_slice} \alias{dplyr_col_modify} \alias{dplyr_reconstruct} \title{Extending dplyr with new data frame subclasses} \usage{ dplyr_row_slice(data, i, ...) dplyr_col_modify(data, cols) dplyr_reconstruct(data, template) } \arguments{ \item{data}{A tibble. We use tibbles because they avoid some inconsistent subset-assignment use cases.} \item{i}{A numeric or logical vector that indexes the rows of \code{data}.} \item{cols}{A named list used to modify columns. A \code{NULL} value should remove an existing column.} \item{template}{Template data frame to use for restoring attributes.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} These three functions, along with \verb{names<-} and 1d numeric \code{[} (i.e. \code{x[loc]}) methods, provide a minimal interface for extending dplyr to work with new data frame subclasses. This means that for simple cases you should only need to provide a couple of methods, rather than a method for every dplyr verb. These functions are a stop-gap measure until we figure out how to solve the problem more generally, but it's likely that any code you write to implement them will find a home in what comes next. } \section{Basic advice}{ This section gives you basic advice if you want to extend dplyr to work with your custom data frame subclass, and you want the dplyr methods to behave in basically the same way. \itemize{ \item If you have data frame attributes that don't depend on the rows or columns (and should unconditionally be preserved), you don't need to do anything. The one exception to this is if your subclass extends a data.frame directly rather than extending a tibble. The \verb{[.data.frame} method does not preserve attributes, so you'll need to write a \code{[} method for your subclass that preserves attributes important for your class. \item If you have \strong{scalar} attributes that depend on \strong{rows}, implement a \code{dplyr_reconstruct()} method. Your method should recompute the attribute depending on rows now present. \item If you have \strong{scalar} attributes that depend on \strong{columns}, implement a \code{dplyr_reconstruct()} method and a 1d \code{[} method. For example, if your class requires that certain columns be present, your method should return a data.frame or tibble when those columns are removed. \item If your attributes are \strong{vectorised} over \strong{rows}, implement a \code{dplyr_row_slice()} method. This gives you access to \code{i} so you can modify the row attribute accordingly. You'll also need to think carefully about how to recompute the attribute in \code{dplyr_reconstruct()}, and you will need to carefully verify the behaviour of each verb, and provide additional methods as needed. \item If your attributes that are \strong{vectorised} over \strong{columns}, implement \code{dplyr_col_modify()}, 1d \code{[}, and \verb{names<-} methods. All of these methods know which columns are being modified, so you can update the column attribute according. You'll also need to think carefully about how to recompute the attribute in \code{dplyr_reconstruct()}, and you will need to carefully verify the behaviour of each verb, and provide additional methods as needed. } } \section{Current usage}{ \itemize{ \item \code{arrange()}, \code{filter()}, \code{slice()} (and the rest of the \verb{slice_*()} family), \code{semi_join()}, and \code{anti_join()} work by generating a vector of row indices, and then subsetting with \code{dplyr_row_slice()}. \item \code{mutate()} generates a list of new column value (using \code{NULL} to indicate when columns should be deleted), then passes that to \code{dplyr_col_modify()}. It also uses 1d \code{[} to implement \code{.keep}, and will call \code{relocate()} if either \code{.before} or \code{.after} are supplied. \item \code{summarise()} and \code{reframe()} work similarly to \code{mutate()} but the data modified by \code{dplyr_col_modify()} comes from \code{group_data()} or is built from \code{.by}. \item \code{select()} uses 1d \code{[} to select columns, then \verb{names<-} to rename them. \code{rename()} just uses \verb{names<-}. \code{relocate()} just uses 1d \code{[}. \item \code{inner_join()}, \code{left_join()}, \code{right_join()}, and \code{full_join()} coerce \code{x} to a tibble, modify the rows, then use \code{dplyr_reconstruct()} to convert back to the same type as \code{x}. \item \code{nest_join()} converts both \code{x} and \code{y} to tibbles, modifies the rows, and uses \code{dplyr_col_modify()} to handle modified key variables and the list-column that \code{y} becomes. It also uses \code{dplyr_reconstruct()} to convert the outer result back to the type of \code{x}, and to convert the nested tibbles back to the type of \code{y}. \item \code{distinct()} does a \code{mutate()} if any expressions are present, then uses 1d \code{[} to select variables to keep, then \code{dplyr_row_slice()} to select distinct rows. } Note that \code{group_by()} and \code{ungroup()} don't use any of these generics and you'll need to provide methods for them directly, or rely on \code{.by} for per-operation grouping. } \keyword{internal} dplyr/man/if_else.Rd0000644000176200001440000000457414525503021014063 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/if-else.R \name{if_else} \alias{if_else} \title{Vectorised if-else} \usage{ if_else(condition, true, false, missing = NULL, ..., ptype = NULL, size = NULL) } \arguments{ \item{condition}{A logical vector} \item{true, false}{Vectors to use for \code{TRUE} and \code{FALSE} values of \code{condition}. Both \code{true} and \code{false} will be \link[vctrs:theory-faq-recycling]{recycled} to the size of \code{condition}. \code{true}, \code{false}, and \code{missing} (if used) will be cast to their common type.} \item{missing}{If not \code{NULL}, will be used as the value for \code{NA} values of \code{condition}. Follows the same size and type rules as \code{true} and \code{false}.} \item{...}{These dots are for future extensions and must be empty.} \item{ptype}{An optional prototype declaring the desired output type. If supplied, this overrides the common type of \code{true}, \code{false}, and \code{missing}.} \item{size}{An optional size declaring the desired output size. If supplied, this overrides the size of \code{condition}.} } \value{ A vector with the same size as \code{condition} and the same type as the common type of \code{true}, \code{false}, and \code{missing}. Where \code{condition} is \code{TRUE}, the matching values from \code{true}, where it is \code{FALSE}, the matching values from \code{false}, and where it is \code{NA}, the matching values from \code{missing}, if provided, otherwise a missing value will be used. } \description{ \code{if_else()} is a vectorized \link[=if]{if-else}. Compared to the base R equivalent, \code{\link[=ifelse]{ifelse()}}, this function allows you to handle missing values in the \code{condition} with \code{missing} and always takes \code{true}, \code{false}, and \code{missing} into account when determining what the output type should be. } \examples{ x <- c(-5:5, NA) if_else(x < 0, NA, x) # Explicitly handle `NA` values in the `condition` with `missing` if_else(x < 0, "negative", "positive", missing = "missing") # Unlike `ifelse()`, `if_else()` preserves types x <- factor(sample(letters[1:5], 10, replace = TRUE)) ifelse(x \%in\% c("a", "b", "c"), x, NA) if_else(x \%in\% c("a", "b", "c"), x, NA) # `if_else()` is often useful for creating new columns inside of `mutate()` starwars \%>\% mutate(category = if_else(height < 100, "short", "tall"), .keep = "used") } dplyr/man/pick.Rd0000644000176200001440000000564714406402754013416 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/pick.R \name{pick} \alias{pick} \title{Select a subset of columns} \usage{ pick(...) } \arguments{ \item{...}{<\code{\link[=dplyr_tidy_select]{tidy-select}}> Columns to pick. You can't pick grouping columns because they are already automatically handled by the verb (i.e. \code{\link[=summarise]{summarise()}} or \code{\link[=mutate]{mutate()}}).} } \value{ A tibble containing the selected columns for the current group. } \description{ \code{pick()} provides a way to easily select a subset of columns from your data using \code{\link[=select]{select()}} semantics while inside a \link[rlang:args_data_masking]{"data-masking"} function like \code{\link[=mutate]{mutate()}} or \code{\link[=summarise]{summarise()}}. \code{pick()} returns a data frame containing the selected columns for the current group. \code{pick()} is complementary to \code{\link[=across]{across()}}: \itemize{ \item With \code{pick()}, you typically apply a function to the full data frame. \item With \code{across()}, you typically apply a function to each column. } } \details{ Theoretically, \code{pick()} is intended to be replaceable with an equivalent call to \code{tibble()}. For example, \code{pick(a, c)} could be replaced with \code{tibble(a = a, c = c)}, and \code{pick(everything())} on a data frame with cols \code{a}, \code{b}, and \code{c} could be replaced with \code{tibble(a = a, b = b, c = c)}. \code{pick()} specially handles the case of an empty selection by returning a 1 row, 0 column tibble, so an exact replacement is more like: \if{html}{\out{
}}\preformatted{size <- vctrs::vec_size_common(..., .absent = 1L) out <- vctrs::vec_recycle_common(..., .size = size) tibble::new_tibble(out, nrow = size) }\if{html}{\out{
}} } \examples{ df <- tibble( x = c(3, 2, 2, 2, 1), y = c(0, 2, 1, 1, 4), z1 = c("a", "a", "a", "b", "a"), z2 = c("c", "d", "d", "a", "c") ) df # `pick()` provides a way to select a subset of your columns using # tidyselect. It returns a data frame. df \%>\% mutate(cols = pick(x, y)) # This is useful for functions that take data frames as inputs. # For example, you can compute a joint rank between `x` and `y`. df \%>\% mutate(rank = dense_rank(pick(x, y))) # `pick()` is also useful as a bridge between data-masking functions (like # `mutate()` or `group_by()`) and functions with tidy-select behavior (like # `select()`). For example, you can use `pick()` to create a wrapper around # `group_by()` that takes a tidy-selection of columns to group on. For more # bridge patterns, see # https://rlang.r-lib.org/reference/topic-data-mask-programming.html#bridge-patterns. my_group_by <- function(data, cols) { group_by(data, pick({{ cols }})) } df \%>\% my_group_by(c(x, starts_with("z"))) # Or you can use it to dynamically select columns to `count()` by df \%>\% count(pick(starts_with("z"))) } \seealso{ \code{\link[=across]{across()}} } dplyr/man/tbl.Rd0000644000176200001440000000063414366556340013246 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/tbl.R \name{tbl} \alias{tbl} \alias{is.tbl} \title{Create a table from a data source} \usage{ tbl(src, ...) is.tbl(x) } \arguments{ \item{src}{A data source} \item{...}{Other arguments passed on to the individual methods} \item{x}{Any object} } \description{ This is a generic method that dispatches based on the first argument. } dplyr/man/dplyr_by.Rd0000644000176200001440000002257014406402754014306 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/by.R \name{dplyr_by} \alias{dplyr_by} \title{Per-operation grouping with \code{.by}/\code{by}} \description{ There are two ways to group in dplyr: \itemize{ \item Persistent grouping with \code{\link[=group_by]{group_by()}} \item Per-operation grouping with \code{.by}/\code{by} } This help page is dedicated to explaining where and why you might want to use the latter. Depending on the dplyr verb, the per-operation grouping argument may be named \code{.by} or \code{by}. The \emph{Supported verbs} section below outlines this on a case-by-case basis. The remainder of this page will refer to \code{.by} for simplicity. Grouping radically affects the computation of the dplyr verb you use it with, and one of the goals of \code{.by} is to allow you to place that grouping specification alongside the code that actually uses it. As an added benefit, with \code{.by} you no longer need to remember to \code{\link[=ungroup]{ungroup()}} after \code{\link[=summarise]{summarise()}}, and \code{summarise()} won't ever message you about how it's handling the groups! This idea comes from \href{https://CRAN.R-project.org/package=data.table}{data.table}, which allows you to specify \code{by} alongside modifications in \code{j}, like: \code{dt[, .(x = mean(x)), by = g]}. \subsection{Supported verbs}{ \itemize{ \item \code{\link[=mutate]{mutate(.by = )}} \item \code{\link[=summarise]{summarise(.by = )}} \item \code{\link[=reframe]{reframe(.by = )}} \item \code{\link[=filter]{filter(.by = )}} \item \code{\link[=slice]{slice(.by = )}} \item \code{\link[=slice_head]{slice_head(by = )}} and \code{\link[=slice_tail]{slice_tail(by = )}} \item \code{\link[=slice_min]{slice_min(by = )}} and \code{\link[=slice_max]{slice_max(by = )}} \item \code{\link[=slice_sample]{slice_sample(by = )}} } Note that some dplyr verbs use \code{by} while others use \code{.by}. This is a purely technical difference. } \subsection{Differences between \code{.by} and \code{group_by()}}{\tabular{ll}{ \code{.by} \tab \code{group_by()} \cr Grouping only affects a single verb \tab Grouping is persistent across multiple verbs \cr Selects variables with \link[=dplyr_tidy_select]{tidy-select} \tab Computes expressions with \link[rlang:args_data_masking]{data-masking} \cr Summaries use existing order of group keys \tab Summaries sort group keys in ascending order \cr } } \subsection{Using \code{.by}}{ Let's take a look at the two grouping approaches using this \code{expenses} data set, which tracks costs accumulated across various \code{id}s and \code{region}s: \if{html}{\out{
}}\preformatted{expenses <- tibble( id = c(1, 2, 1, 3, 1, 2, 3), region = c("A", "A", "A", "B", "B", "A", "A"), cost = c(25, 20, 19, 12, 9, 6, 6) ) expenses #> # A tibble: 7 x 3 #> id region cost #> #> 1 1 A 25 #> 2 2 A 20 #> 3 1 A 19 #> 4 3 B 12 #> 5 1 B 9 #> 6 2 A 6 #> 7 3 A 6 }\if{html}{\out{
}} Imagine that you wanted to compute the average cost per region. You'd probably write something like this: \if{html}{\out{
}}\preformatted{expenses \%>\% group_by(region) \%>\% summarise(cost = mean(cost)) #> # A tibble: 2 x 2 #> region cost #> #> 1 A 15.2 #> 2 B 10.5 }\if{html}{\out{
}} Instead, you can now specify the grouping \emph{inline} within the verb: \if{html}{\out{
}}\preformatted{expenses \%>\% summarise(cost = mean(cost), .by = region) #> # A tibble: 2 x 2 #> region cost #> #> 1 A 15.2 #> 2 B 10.5 }\if{html}{\out{
}} \code{.by} applies to a single operation, meaning that since \code{expenses} was an ungrouped data frame, the result after applying \code{.by} will also always be an ungrouped data frame, regardless of the number of grouping columns. \if{html}{\out{
}}\preformatted{expenses \%>\% summarise(cost = mean(cost), .by = c(id, region)) #> # A tibble: 5 x 3 #> id region cost #> #> 1 1 A 22 #> 2 2 A 13 #> 3 3 B 12 #> 4 1 B 9 #> 5 3 A 6 }\if{html}{\out{
}} Compare that with \code{group_by() \%>\% summarise()}, where \code{summarise()} generally peels off 1 layer of grouping by default, typically with a message that it is doing so: \if{html}{\out{
}}\preformatted{expenses \%>\% group_by(id, region) \%>\% summarise(cost = mean(cost)) #> `summarise()` has grouped output by 'id'. You can override using the `.groups` #> argument. #> # A tibble: 5 x 3 #> # Groups: id [3] #> id region cost #> #> 1 1 A 22 #> 2 1 B 9 #> 3 2 A 13 #> 4 3 A 6 #> 5 3 B 12 }\if{html}{\out{
}} Because \code{.by} grouping applies to a single operation, you don't need to worry about ungrouping, and it never needs to emit a message to remind you what it is doing with the groups. Note that with \code{.by} we specified multiple columns to group by using the \link[=dplyr_tidy_select]{tidy-select} syntax \code{c(id, region)}. If you have a character vector of column names you'd like to group by, you can do so with \code{.by = all_of(my_cols)}. It will group by the columns in the order they were provided. To prevent surprising results, you can't use \code{.by} on an existing grouped data frame: \if{html}{\out{
}}\preformatted{expenses \%>\% group_by(id) \%>\% summarise(cost = mean(cost), .by = c(id, region)) #> Error in `summarise()`: #> ! Can't supply `.by` when `.data` is a grouped data frame. }\if{html}{\out{
}} So far we've focused on the usage of \code{.by} with \code{summarise()}, but \code{.by} works with a number of other dplyr verbs. For example, you could append the mean cost per region onto the original data frame as a new column rather than computing a summary: \if{html}{\out{
}}\preformatted{expenses \%>\% mutate(cost_by_region = mean(cost), .by = region) #> # A tibble: 7 x 4 #> id region cost cost_by_region #> #> 1 1 A 25 15.2 #> 2 2 A 20 15.2 #> 3 1 A 19 15.2 #> 4 3 B 12 10.5 #> 5 1 B 9 10.5 #> 6 2 A 6 15.2 #> 7 3 A 6 15.2 }\if{html}{\out{
}} Or you could slice out the maximum cost per combination of id and region: \if{html}{\out{
}}\preformatted{# Note that the argument is named `by` in `slice_max()` expenses \%>\% slice_max(cost, n = 1, by = c(id, region)) #> # A tibble: 5 x 3 #> id region cost #> #> 1 1 A 25 #> 2 2 A 20 #> 3 3 B 12 #> 4 1 B 9 #> 5 3 A 6 }\if{html}{\out{
}} } \subsection{Result ordering}{ When used with \code{.by}, \code{summarise()}, \code{reframe()}, and \code{slice()} all maintain the ordering of the existing data. This is different from \code{group_by()}, which has always sorted the group keys in ascending order. \if{html}{\out{
}}\preformatted{df <- tibble( month = c("jan", "jan", "feb", "feb", "mar"), temp = c(20, 25, 18, 20, 40) ) # Uses ordering by "first appearance" in the original data df \%>\% summarise(average_temp = mean(temp), .by = month) #> # A tibble: 3 x 2 #> month average_temp #> #> 1 jan 22.5 #> 2 feb 19 #> 3 mar 40 # Sorts in ascending order df \%>\% group_by(month) \%>\% summarise(average_temp = mean(temp)) #> # A tibble: 3 x 2 #> month average_temp #> #> 1 feb 19 #> 2 jan 22.5 #> 3 mar 40 }\if{html}{\out{
}} If you need sorted group keys, we recommend that you explicitly use \code{\link[=arrange]{arrange()}} either before or after the call to \code{summarise()}, \code{reframe()}, or \code{slice()}. This also gives you full access to all of \code{arrange()}'s features, such as \code{desc()} and the \code{.locale} argument. } \subsection{Verbs without \code{.by} support}{ If a dplyr verb doesn't support \code{.by}, then that typically means that the verb isn't inherently affected by grouping. For example, \code{\link[=pull]{pull()}} and \code{\link[=rename]{rename()}} don't support \code{.by}, because specifying columns to group by would not affect their implementations. That said, there are a few exceptions to this where sometimes a dplyr verb doesn't support \code{.by}, but \emph{does} have special support for grouped data frames created by \code{\link[=group_by]{group_by()}}. This is typically because the verbs are required to retain the grouping columns, for example: \itemize{ \item \code{\link[=select]{select()}} always retains grouping columns, with a message if any aren't specified in the \code{select()} call. \item \code{\link[=distinct]{distinct()}} and \code{\link[=count]{count()}} place unspecified grouping columns at the front of the data frame before computing their results. \item \code{\link[=arrange]{arrange()}} has a \code{.by_group} argument to optionally order by grouping columns first. } If \code{group_by()} didn't exist, then these verbs would not have special support for grouped data frames. } } dplyr/man/percent_rank.Rd0000644000176200001440000000321614366556340015137 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/rank.R \name{percent_rank} \alias{percent_rank} \alias{cume_dist} \title{Proportional ranking functions} \usage{ percent_rank(x) cume_dist(x) } \arguments{ \item{x}{A vector to rank By default, the smallest values will get the smallest ranks. Use \code{\link[=desc]{desc()}} to reverse the direction so the largest values get the smallest ranks. Missing values will be given rank \code{NA}. Use \code{coalesce(x, Inf)} or \code{coalesce(x, -Inf)} if you want to treat them as the largest or smallest values respectively. To rank by multiple columns at once, supply a data frame.} } \value{ A numeric vector containing a proportion. } \description{ These two ranking functions implement two slightly different ways to compute a percentile. For each \code{x_i} in \code{x}: \itemize{ \item \code{cume_dist(x)} counts the total number of values less than or equal to \code{x_i}, and divides it by the number of observations. \item \code{percent_rank(x)} counts the total number of values less than \code{x_i}, and divides it by the number of observations minus 1. } In both cases, missing values are ignored when counting the number of observations. } \examples{ x <- c(5, 1, 3, 2, 2) cume_dist(x) percent_rank(x) # You can understand what's going on by computing it by hand sapply(x, function(xi) sum(x <= xi) / length(x)) sapply(x, function(xi) sum(x < xi) / (length(x) - 1)) # The real computations are a little more complex in order to # correctly deal with missing values } \seealso{ Other ranking functions: \code{\link{ntile}()}, \code{\link{row_number}()} } \concept{ranking functions} dplyr/man/cumall.Rd0000644000176200001440000000264413663216626013744 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/funs.R \name{cumall} \alias{cumall} \alias{cumany} \alias{cummean} \title{Cumulativate versions of any, all, and mean} \usage{ cumall(x) cumany(x) cummean(x) } \arguments{ \item{x}{For \code{cumall()} and \code{cumany()}, a logical vector; for \code{cummean()} an integer or numeric vector.} } \value{ A vector the same length as \code{x}. } \description{ dplyr provides \code{cumall()}, \code{cumany()}, and \code{cummean()} to complete R's set of cumulative functions. } \section{Cumulative logical functions}{ These are particularly useful in conjunction with \code{filter()}: \itemize{ \item \code{cumall(x)}: all cases until the first \code{FALSE}. \item \code{cumall(!x)}: all cases until the first \code{TRUE}. \item \code{cumany(x)}: all cases after the first \code{TRUE}. \item \code{cumany(!x)}: all cases after the first \code{FALSE}. } } \examples{ # `cummean()` returns a numeric/integer vector of the same length # as the input vector. x <- c(1, 3, 5, 2, 2) cummean(x) cumsum(x) / seq_along(x) # `cumall()` and `cumany()` return logicals cumall(x < 5) cumany(x == 3) # `cumall()` vs. `cumany()` df <- data.frame( date = as.Date("2020-01-01") + 0:6, balance = c(100, 50, 25, -25, -50, 30, 120) ) # all rows after first overdraft df \%>\% filter(cumany(balance < 0)) # all rows until first overdraft df \%>\% filter(cumall(!(balance < 0))) } dplyr/man/grouped_df.Rd0000644000176200001440000000156014366556340014602 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/grouped-df.R \name{grouped_df} \alias{grouped_df} \alias{is.grouped_df} \alias{is_grouped_df} \title{A grouped data frame.} \usage{ grouped_df(data, vars, drop = group_by_drop_default(data)) is.grouped_df(x) is_grouped_df(x) } \arguments{ \item{data}{a tbl or data frame.} \item{vars}{A character vector.} \item{drop}{When \code{.drop = TRUE}, empty groups are dropped.} } \description{ The easiest way to create a grouped data frame is to call the \code{group_by()} method on a data frame or tbl: this will take care of capturing the unevaluated expressions for you. These functions are designed for programmatic use. For data analysis purposes see \code{\link[=group_data]{group_data()}} for the accessor functions that retrieve various metadata from a grouped data frames. } \keyword{internal} dplyr/man/add_rownames.Rd0000644000176200001440000000114014266276767015134 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/deprec-tibble.R \name{add_rownames} \alias{add_rownames} \title{Convert row names to an explicit variable.} \usage{ add_rownames(df, var = "rowname") } \arguments{ \item{df}{Input data frame with rownames.} \item{var}{Name of variable to use} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} Please use \code{\link[tibble:rownames]{tibble::rownames_to_column()}} instead. } \keyword{internal} dplyr/man/tidyeval-compat.Rd0000644000176200001440000000225414406415372015561 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/utils-tidy-eval.R \name{tidyeval-compat} \alias{tidyeval-compat} \alias{.data} \alias{expr} \alias{enquo} \alias{enquos} \alias{sym} \alias{syms} \alias{as_label} \alias{quo} \alias{quos} \alias{quo_name} \alias{ensym} \alias{ensyms} \alias{enexpr} \alias{enexprs} \title{Other tidy eval tools} \description{ These tidy eval functions are no longer for normal usage, but are still exported from dplyr for backward compatibility. See \code{\link[rlang:args_data_masking]{?rlang::args_data_masking}} and \code{vignette("programming")} for the latest recommendations. \itemize{ \item \link[rlang:expr]{expr()} \item \link[rlang:enquo]{enquo()} \item \link[rlang:enquo]{enquos()} \item \link[rlang:sym]{sym()} \item \link[rlang:sym]{syms()} \item \link[rlang:as_label]{as_label()} \item \link[rlang:defusing-advanced]{quo()} \item \link[rlang:defusing-advanced]{quos()} \item \link[rlang:quo_label]{quo_name()} \item \link[rlang:defusing-advanced]{ensym()} \item \link[rlang:defusing-advanced]{ensyms()} \item \link[rlang:defusing-advanced]{enexpr()} \item \link[rlang:defusing-advanced]{enexprs()} } } \keyword{internal} dplyr/man/vars.Rd0000644000176200001440000000223514366556340013437 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/colwise.R \name{vars} \alias{vars} \title{Select variables} \usage{ vars(...) } \arguments{ \item{...}{<\code{\link[=dplyr_tidy_select]{tidy-select}}> Variables to operate on.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#superseded}{\figure{lifecycle-superseded.svg}{options: alt='[Superseded]'}}}{\strong{[Superseded]}} \code{vars()} is superseded because it is only needed for the scoped verbs (i.e. \code{\link[=mutate_at]{mutate_at()}}, \code{\link[=summarise_at]{summarise_at()}}, and friends), which have been been superseded in favour of \code{\link[=across]{across()}}. See \code{vignette("colwise")} for details. This helper is intended to provide tidy-select semantics for scoped verbs like \code{mutate_at()} and \code{summarise_at()}. Note that anywhere you can supply \code{vars()} specification, you can also supply a numeric vector of column positions or a character vector of column names. } \seealso{ \code{\link[=all_vars]{all_vars()}} and \code{\link[=any_vars]{any_vars()}} for other quoting functions that you can use with scoped verbs. } dplyr/man/distinct_all.Rd0000644000176200001440000000517014366556340015136 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/colwise-distinct.R \name{distinct_all} \alias{distinct_all} \alias{distinct_at} \alias{distinct_if} \title{Select distinct rows by a selection of variables} \usage{ distinct_all(.tbl, .funs = list(), ..., .keep_all = FALSE) distinct_at(.tbl, .vars, .funs = list(), ..., .keep_all = FALSE) distinct_if(.tbl, .predicate, .funs = list(), ..., .keep_all = FALSE) } \arguments{ \item{.tbl}{A \code{tbl} object.} \item{.funs}{A function \code{fun}, a quosure style lambda \code{~ fun(.)} or a list of either form.} \item{...}{Additional arguments for the function calls in \code{.funs}. These are evaluated only once, with \link[rlang:dyn-dots]{tidy dots} support.} \item{.keep_all}{If \code{TRUE}, keep all variables in \code{.data}. If a combination of \code{...} is not distinct, this keeps the first row of values.} \item{.vars}{A list of columns generated by \code{\link[=vars]{vars()}}, a character vector of column names, a numeric vector of column positions, or \code{NULL}.} \item{.predicate}{A predicate function to be applied to the columns or a logical vector. The variables for which \code{.predicate} is or returns \code{TRUE} are selected. This argument is passed to \code{\link[rlang:as_function]{rlang::as_function()}} and thus supports quosure-style lambda functions and strings representing function names.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#superseded}{\figure{lifecycle-superseded.svg}{options: alt='[Superseded]'}}}{\strong{[Superseded]}} Scoped verbs (\verb{_if}, \verb{_at}, \verb{_all}) have been superseded by the use of \code{\link[=pick]{pick()}} or \code{\link[=across]{across()}} in an existing verb. See \code{vignette("colwise")} for details. These \link{scoped} variants of \code{\link[=distinct]{distinct()}} extract distinct rows by a selection of variables. Like \code{distinct()}, you can modify the variables before ordering with the \code{.funs} argument. } \section{Grouping variables}{ The grouping variables that are part of the selection are taken into account to determine distinct rows. } \examples{ df <- tibble(x = rep(2:5, each = 2) / 2, y = rep(2:3, each = 4) / 2) distinct_all(df) # -> distinct(df, pick(everything())) distinct_at(df, vars(x,y)) # -> distinct(df, pick(x, y)) distinct_if(df, is.numeric) # -> distinct(df, pick(where(is.numeric))) # You can supply a function that will be applied before extracting the distinct values # The variables of the sorted tibble keep their original values. distinct_all(df, round) # -> distinct(df, across(everything(), round)) } \keyword{internal} dplyr/man/dplyr_data_masking.Rd0000644000176200001440000000045314406402754016312 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/utils-tidy-eval.R \name{dplyr_data_masking} \alias{dplyr_data_masking} \title{Data-masking} \description{ This page is now located at \code{\link[rlang:args_data_masking]{?rlang::args_data_masking}}. } \keyword{internal} dplyr/man/mutate_all.Rd0000644000176200001440000001554114366556340014617 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/colwise-mutate.R \name{mutate_all} \alias{mutate_all} \alias{mutate_if} \alias{mutate_at} \alias{transmute_all} \alias{transmute_if} \alias{transmute_at} \title{Mutate multiple columns} \usage{ mutate_all(.tbl, .funs, ...) mutate_if(.tbl, .predicate, .funs, ...) mutate_at(.tbl, .vars, .funs, ..., .cols = NULL) transmute_all(.tbl, .funs, ...) transmute_if(.tbl, .predicate, .funs, ...) transmute_at(.tbl, .vars, .funs, ..., .cols = NULL) } \arguments{ \item{.tbl}{A \code{tbl} object.} \item{.funs}{A function \code{fun}, a quosure style lambda \code{~ fun(.)} or a list of either form.} \item{...}{Additional arguments for the function calls in \code{.funs}. These are evaluated only once, with \link[rlang:dyn-dots]{tidy dots} support.} \item{.predicate}{A predicate function to be applied to the columns or a logical vector. The variables for which \code{.predicate} is or returns \code{TRUE} are selected. This argument is passed to \code{\link[rlang:as_function]{rlang::as_function()}} and thus supports quosure-style lambda functions and strings representing function names.} \item{.vars}{A list of columns generated by \code{\link[=vars]{vars()}}, a character vector of column names, a numeric vector of column positions, or \code{NULL}.} \item{.cols}{This argument has been renamed to \code{.vars} to fit dplyr's terminology and is deprecated.} } \value{ A data frame. By default, the newly created columns have the shortest names needed to uniquely identify the output. To force inclusion of a name, even when not needed, name the input (see examples for details). } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#superseded}{\figure{lifecycle-superseded.svg}{options: alt='[Superseded]'}}}{\strong{[Superseded]}} Scoped verbs (\verb{_if}, \verb{_at}, \verb{_all}) have been superseded by the use of \code{\link[=pick]{pick()}} or \code{\link[=across]{across()}} in an existing verb. See \code{vignette("colwise")} for details. The \link{scoped} variants of \code{\link[=mutate]{mutate()}} and \code{\link[=transmute]{transmute()}} make it easy to apply the same transformation to multiple variables. There are three variants: \itemize{ \item _all affects every variable \item _at affects variables selected with a character vector or vars() \item _if affects variables selected with a predicate function: } } \section{Grouping variables}{ If applied on a grouped tibble, these operations are \emph{not} applied to the grouping variables. The behaviour depends on whether the selection is \strong{implicit} (\code{all} and \code{if} selections) or \strong{explicit} (\code{at} selections). \itemize{ \item Grouping variables covered by explicit selections in \code{mutate_at()} and \code{transmute_at()} are always an error. Add \code{-group_cols()} to the \code{\link[=vars]{vars()}} selection to avoid this: \if{html}{\out{
}}\preformatted{data \%>\% mutate_at(vars(-group_cols(), ...), myoperation) }\if{html}{\out{
}} Or remove \code{group_vars()} from the character vector of column names: \if{html}{\out{
}}\preformatted{nms <- setdiff(nms, group_vars(data)) data \%>\% mutate_at(vars, myoperation) }\if{html}{\out{
}} \item Grouping variables covered by implicit selections are ignored by \code{mutate_all()}, \code{transmute_all()}, \code{mutate_if()}, and \code{transmute_if()}. } } \section{Naming}{ The names of the new columns are derived from the names of the input variables and the names of the functions. \itemize{ \item if there is only one unnamed function (i.e. if \code{.funs} is an unnamed list of length one), the names of the input variables are used to name the new columns; \item for \verb{_at} functions, if there is only one unnamed variable (i.e., if \code{.vars} is of the form \code{vars(a_single_column)}) and \code{.funs} has length greater than one, the names of the functions are used to name the new columns; \item otherwise, the new names are created by concatenating the names of the input variables and the names of the functions, separated with an underscore \code{"_"}. } The \code{.funs} argument can be a named or unnamed list. If a function is unnamed and the name cannot be derived automatically, a name of the form "fn#" is used. Similarly, \code{\link[=vars]{vars()}} accepts named and unnamed arguments. If a variable in \code{.vars} is named, a new column by that name will be created. Name collisions in the new columns are disambiguated using a unique suffix. } \examples{ iris <- as_tibble(iris) # All variants can be passed functions and additional arguments, # purrr-style. The _at() variants directly support strings. Here # we'll scale the variables `height` and `mass`: scale2 <- function(x, na.rm = FALSE) (x - mean(x, na.rm = na.rm)) / sd(x, na.rm) starwars \%>\% mutate_at(c("height", "mass"), scale2) # -> starwars \%>\% mutate(across(c("height", "mass"), scale2)) # You can pass additional arguments to the function: starwars \%>\% mutate_at(c("height", "mass"), scale2, na.rm = TRUE) starwars \%>\% mutate_at(c("height", "mass"), ~scale2(., na.rm = TRUE)) # -> starwars \%>\% mutate(across(c("height", "mass"), ~ scale2(.x, na.rm = TRUE))) # You can also supply selection helpers to _at() functions but you have # to quote them with vars(): iris \%>\% mutate_at(vars(matches("Sepal")), log) iris \%>\% mutate(across(matches("Sepal"), log)) # The _if() variants apply a predicate function (a function that # returns TRUE or FALSE) to determine the relevant subset of # columns. Here we divide all the numeric columns by 100: starwars \%>\% mutate_if(is.numeric, scale2, na.rm = TRUE) starwars \%>\% mutate(across(where(is.numeric), ~ scale2(.x, na.rm = TRUE))) # mutate_if() is particularly useful for transforming variables from # one type to another iris \%>\% mutate_if(is.factor, as.character) iris \%>\% mutate_if(is.double, as.integer) # -> iris \%>\% mutate(across(where(is.factor), as.character)) iris \%>\% mutate(across(where(is.double), as.integer)) # Multiple transformations ---------------------------------------- # If you want to apply multiple transformations, pass a list of # functions. When there are multiple functions, they create new # variables instead of modifying the variables in place: iris \%>\% mutate_if(is.numeric, list(scale2, log)) iris \%>\% mutate_if(is.numeric, list(~scale2(.), ~log(.))) iris \%>\% mutate_if(is.numeric, list(scale = scale2, log = log)) # -> iris \%>\% as_tibble() \%>\% mutate(across(where(is.numeric), list(scale = scale2, log = log))) # When there's only one function in the list, it modifies existing # variables in place. Give it a name to instead create new variables: iris \%>\% mutate_if(is.numeric, list(scale2)) iris \%>\% mutate_if(is.numeric, list(scale = scale2)) } \seealso{ \link[=scoped]{The other scoped verbs}, \code{\link[=vars]{vars()}} } \keyword{internal} dplyr/man/group_map.Rd0000644000176200001440000001045214366556340014455 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/group-map.R \name{group_map} \alias{group_map} \alias{group_modify} \alias{group_walk} \title{Apply a function to each group} \usage{ group_map(.data, .f, ..., .keep = FALSE) group_modify(.data, .f, ..., .keep = FALSE) group_walk(.data, .f, ..., .keep = FALSE) } \arguments{ \item{.data}{A grouped tibble} \item{.f}{A function or formula to apply to each group. If a \strong{function}, it is used as is. It should have at least 2 formal arguments. If a \strong{formula}, e.g. \code{~ head(.x)}, it is converted to a function. In the formula, you can use \itemize{ \item \code{.} or \code{.x} to refer to the subset of rows of \code{.tbl} for the given group \item \code{.y} to refer to the key, a one row tibble with one column per grouping variable that identifies the group }} \item{...}{Additional arguments passed on to \code{.f}} \item{.keep}{are the grouping variables kept in \code{.x}} } \value{ \itemize{ \item \code{group_modify()} returns a grouped tibble. In that case \code{.f} must return a data frame. \item \code{group_map()} returns a list of results from calling \code{.f} on each group. \item \code{group_walk()} calls \code{.f} for side effects and returns the input \code{.tbl}, invisibly. } } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} \code{group_map()}, \code{group_modify()} and \code{group_walk()} are purrr-style functions that can be used to iterate on grouped tibbles. } \details{ Use \code{group_modify()} when \code{summarize()} is too limited, in terms of what you need to do and return for each group. \code{group_modify()} is good for "data frame in, data frame out". If that is too limited, you need to use a \link[=group_nest]{nested} or \link[=group_split]{split} workflow. \code{group_modify()} is an evolution of \code{\link[=do]{do()}}, if you have used that before. Each conceptual group of the data frame is exposed to the function \code{.f} with two pieces of information: \itemize{ \item The subset of the data for the group, exposed as \code{.x}. \item The key, a tibble with exactly one row and columns for each grouping variable, exposed as \code{.y}. } For completeness, \code{group_modify()}, \code{group_map} and \code{group_walk()} also work on ungrouped data frames, in that case the function is applied to the entire data frame (exposed as \code{.x}), and \code{.y} is a one row tibble with no column, consistently with \code{\link[=group_keys]{group_keys()}}. } \examples{ # return a list mtcars \%>\% group_by(cyl) \%>\% group_map(~ head(.x, 2L)) # return a tibble grouped by `cyl` with 2 rows per group # the grouping data is recalculated mtcars \%>\% group_by(cyl) \%>\% group_modify(~ head(.x, 2L)) \dontshow{if (requireNamespace("broom", quietly = TRUE)) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} # a list of tibbles iris \%>\% group_by(Species) \%>\% group_map(~ broom::tidy(lm(Petal.Length ~ Sepal.Length, data = .x))) # a restructured grouped tibble iris \%>\% group_by(Species) \%>\% group_modify(~ broom::tidy(lm(Petal.Length ~ Sepal.Length, data = .x))) \dontshow{\}) # examplesIf} # a list of vectors iris \%>\% group_by(Species) \%>\% group_map(~ quantile(.x$Petal.Length, probs = c(0.25, 0.5, 0.75))) # to use group_modify() the lambda must return a data frame iris \%>\% group_by(Species) \%>\% group_modify(~ { quantile(.x$Petal.Length, probs = c(0.25, 0.5, 0.75)) \%>\% tibble::enframe(name = "prob", value = "quantile") }) iris \%>\% group_by(Species) \%>\% group_modify(~ { .x \%>\% purrr::map_dfc(fivenum) \%>\% mutate(nms = c("min", "Q1", "median", "Q3", "max")) }) # group_walk() is for side effects dir.create(temp <- tempfile()) iris \%>\% group_by(Species) \%>\% group_walk(~ write.csv(.x, file = file.path(temp, paste0(.y$Species, ".csv")))) list.files(temp, pattern = "csv$") unlink(temp, recursive = TRUE) # group_modify() and ungrouped data frames mtcars \%>\% group_modify(~ head(.x, 2L)) } \seealso{ Other grouping functions: \code{\link{group_by}()}, \code{\link{group_nest}()}, \code{\link{group_split}()}, \code{\link{group_trim}()} } \concept{grouping functions} dplyr/man/rename.Rd0000644000176200001440000000562614406402754013734 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/rename.R \name{rename} \alias{rename} \alias{rename_with} \title{Rename columns} \usage{ rename(.data, ...) rename_with(.data, .fn, .cols = everything(), ...) } \arguments{ \item{.data}{A data frame, data frame extension (e.g. a tibble), or a lazy data frame (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for more details.} \item{...}{For \code{rename()}: <\code{\link[=dplyr_tidy_select]{tidy-select}}> Use \code{new_name = old_name} to rename selected variables. For \code{rename_with()}: additional arguments passed onto \code{.fn}.} \item{.fn}{A function used to transform the selected \code{.cols}. Should return a character vector the same length as the input.} \item{.cols}{<\code{\link[=dplyr_tidy_select]{tidy-select}}> Columns to rename; defaults to all columns.} } \value{ An object of the same type as \code{.data}. The output has the following properties: \itemize{ \item Rows are not affected. \item Column names are changed; column order is preserved. \item Data frame attributes are preserved. \item Groups are updated to reflect new names. } } \description{ \code{rename()} changes the names of individual variables using \code{new_name = old_name} syntax; \code{rename_with()} renames columns using a function. } \section{Methods}{ This function is a \strong{generic}, which means that packages can provide implementations (methods) for other classes. See the documentation of individual methods for extra arguments and differences in behaviour. The following methods are currently available in loaded packages: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("rename")}. } \examples{ iris <- as_tibble(iris) # so it prints a little nicer rename(iris, petal_length = Petal.Length) # Rename using a named vector and `all_of()` lookup <- c(pl = "Petal.Length", sl = "Sepal.Length") rename(iris, all_of(lookup)) # If your named vector might contain names that don't exist in the data, # use `any_of()` instead lookup <- c(lookup, new = "unknown") try(rename(iris, all_of(lookup))) rename(iris, any_of(lookup)) rename_with(iris, toupper) rename_with(iris, toupper, starts_with("Petal")) rename_with(iris, ~ tolower(gsub(".", "_", .x, fixed = TRUE))) \dontshow{if (getRversion() > "4.0.1") (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} # If your renaming function uses `paste0()`, make sure to set # `recycle0 = TRUE` to ensure that empty selections are recycled correctly try(rename_with( iris, ~ paste0("prefix_", .x), starts_with("nonexistent") )) rename_with( iris, ~ paste0("prefix_", .x, recycle0 = TRUE), starts_with("nonexistent") ) \dontshow{\}) # examplesIf} } \seealso{ Other single table verbs: \code{\link{arrange}()}, \code{\link{filter}()}, \code{\link{mutate}()}, \code{\link{reframe}()}, \code{\link{select}()}, \code{\link{slice}()}, \code{\link{summarise}()} } \concept{single table verbs} dplyr/man/args_by.Rd0000644000176200001440000000133314366556340014110 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/by.R \name{args_by} \alias{args_by} \title{Helper for consistent documentation of \code{.by}} \arguments{ \item{.by}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} <\code{\link[=dplyr_tidy_select]{tidy-select}}> Optionally, a selection of columns to group by for just this operation, functioning as an alternative to \code{\link[=group_by]{group_by()}}. For details and examples, see \link[=dplyr_by]{?dplyr_by}.} } \description{ Use \verb{@inheritParams args_by} to consistently document \code{.by}. } \keyword{internal} dplyr/man/sql.Rd0000644000176200001440000000070614272553254013261 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/compat-dbplyr.R \name{sql} \alias{sql} \title{SQL escaping.} \usage{ sql(...) } \arguments{ \item{...}{Character vectors that will be combined into a single SQL expression.} } \description{ These functions are critical when writing functions that translate R functions to sql functions. Typically a conversion function should escape all its inputs and return an sql object. } dplyr/man/sample_n.Rd0000644000176200001440000000550414366556340014264 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/sample.R \name{sample_n} \alias{sample_n} \alias{sample_frac} \title{Sample n rows from a table} \usage{ sample_n(tbl, size, replace = FALSE, weight = NULL, .env = NULL, ...) sample_frac(tbl, size = 1, replace = FALSE, weight = NULL, .env = NULL, ...) } \arguments{ \item{tbl}{A data.frame.} \item{size}{<\code{\link[=dplyr_tidy_select]{tidy-select}}> For \code{sample_n()}, the number of rows to select. For \code{sample_frac()}, the fraction of rows to select. If \code{tbl} is grouped, \code{size} applies to each group.} \item{replace}{Sample with or without replacement?} \item{weight}{<\code{\link[=dplyr_tidy_select]{tidy-select}}> Sampling weights. This must evaluate to a vector of non-negative numbers the same length as the input. Weights are automatically standardised to sum to 1.} \item{.env}{DEPRECATED.} \item{...}{ignored} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#superseded}{\figure{lifecycle-superseded.svg}{options: alt='[Superseded]'}}}{\strong{[Superseded]}} \code{sample_n()} and \code{sample_frac()} have been superseded in favour of \code{\link[=slice_sample]{slice_sample()}}. While they will not be deprecated in the near future, retirement means that we will only perform critical bug fixes, so we recommend moving to the newer alternative. These functions were superseded because we realised it was more convenient to have two mutually exclusive arguments to one function, rather than two separate functions. This also made it to clean up a few other smaller design issues with \code{sample_n()}/\code{sample_frac}: \itemize{ \item The connection to \code{slice()} was not obvious. \item The name of the first argument, \code{tbl}, is inconsistent with other single table verbs which use \code{.data}. \item The \code{size} argument uses tidy evaluation, which is surprising and undocumented. \item It was easier to remove the deprecated \code{.env} argument. \item \code{...} was in a suboptimal position. } } \examples{ df <- tibble(x = 1:5, w = c(0.1, 0.1, 0.1, 2, 2)) # sample_n() -> slice_sample() ---------------------------------------------- # Was: sample_n(df, 3) sample_n(df, 10, replace = TRUE) sample_n(df, 3, weight = w) # Now: slice_sample(df, n = 3) slice_sample(df, n = 10, replace = TRUE) slice_sample(df, n = 3, weight_by = w) # Note that sample_n() would error if n was bigger than the group size # slice_sample() will just use the available rows for consistency with # the other slice helpers like slice_head() try(sample_n(df, 10)) slice_sample(df, n = 10) # sample_frac() -> slice_sample() ------------------------------------------- # Was: sample_frac(df, 0.25) sample_frac(df, 2, replace = TRUE) # Now: slice_sample(df, prop = 0.25) slice_sample(df, prop = 2, replace = TRUE) } \keyword{internal} dplyr/man/mutate.Rd0000644000176200001440000001676614406402754013773 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mutate.R \name{mutate} \alias{mutate} \alias{mutate.data.frame} \title{Create, modify, and delete columns} \usage{ mutate(.data, ...) \method{mutate}{data.frame}( .data, ..., .by = NULL, .keep = c("all", "used", "unused", "none"), .before = NULL, .after = NULL ) } \arguments{ \item{.data}{A data frame, data frame extension (e.g. a tibble), or a lazy data frame (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for more details.} \item{...}{<\code{\link[rlang:args_data_masking]{data-masking}}> Name-value pairs. The name gives the name of the column in the output. The value can be: \itemize{ \item A vector of length 1, which will be recycled to the correct length. \item A vector the same length as the current group (or the whole data frame if ungrouped). \item \code{NULL}, to remove the column. \item A data frame or tibble, to create multiple columns in the output. }} \item{.by}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} <\code{\link[=dplyr_tidy_select]{tidy-select}}> Optionally, a selection of columns to group by for just this operation, functioning as an alternative to \code{\link[=group_by]{group_by()}}. For details and examples, see \link[=dplyr_by]{?dplyr_by}.} \item{.keep}{Control which columns from \code{.data} are retained in the output. Grouping columns and columns created by \code{...} are always kept. \itemize{ \item \code{"all"} retains all columns from \code{.data}. This is the default. \item \code{"used"} retains only the columns used in \code{...} to create new columns. This is useful for checking your work, as it displays inputs and outputs side-by-side. \item \code{"unused"} retains only the columns \emph{not} used in \code{...} to create new columns. This is useful if you generate new columns, but no longer need the columns used to generate them. \item \code{"none"} doesn't retain any extra columns from \code{.data}. Only the grouping variables and columns created by \code{...} are kept. }} \item{.before, .after}{<\code{\link[=dplyr_tidy_select]{tidy-select}}> Optionally, control where new columns should appear (the default is to add to the right hand side). See \code{\link[=relocate]{relocate()}} for more details.} } \value{ An object of the same type as \code{.data}. The output has the following properties: \itemize{ \item Columns from \code{.data} will be preserved according to the \code{.keep} argument. \item Existing columns that are modified by \code{...} will always be returned in their original location. \item New columns created through \code{...} will be placed according to the \code{.before} and \code{.after} arguments. \item The number of rows is not affected. \item Columns given the value \code{NULL} will be removed. \item Groups will be recomputed if a grouping variable is mutated. \item Data frame attributes are preserved. } } \description{ \code{mutate()} creates new columns that are functions of existing variables. It can also modify (if the name is the same as an existing column) and delete columns (by setting their value to \code{NULL}). } \section{Useful mutate functions}{ \itemize{ \item \code{\link{+}}, \code{\link{-}}, \code{\link[=log]{log()}}, etc., for their usual mathematical meanings \item \code{\link[=lead]{lead()}}, \code{\link[=lag]{lag()}} \item \code{\link[=dense_rank]{dense_rank()}}, \code{\link[=min_rank]{min_rank()}}, \code{\link[=percent_rank]{percent_rank()}}, \code{\link[=row_number]{row_number()}}, \code{\link[=cume_dist]{cume_dist()}}, \code{\link[=ntile]{ntile()}} \item \code{\link[=cumsum]{cumsum()}}, \code{\link[=cummean]{cummean()}}, \code{\link[=cummin]{cummin()}}, \code{\link[=cummax]{cummax()}}, \code{\link[=cumany]{cumany()}}, \code{\link[=cumall]{cumall()}} \item \code{\link[=na_if]{na_if()}}, \code{\link[=coalesce]{coalesce()}} \item \code{\link[=if_else]{if_else()}}, \code{\link[=recode]{recode()}}, \code{\link[=case_when]{case_when()}} } } \section{Grouped tibbles}{ Because mutating expressions are computed within groups, they may yield different results on grouped tibbles. This will be the case as soon as an aggregating, lagging, or ranking function is involved. Compare this ungrouped mutate: \if{html}{\out{
}}\preformatted{starwars \%>\% select(name, mass, species) \%>\% mutate(mass_norm = mass / mean(mass, na.rm = TRUE)) }\if{html}{\out{
}} With the grouped equivalent: \if{html}{\out{
}}\preformatted{starwars \%>\% select(name, mass, species) \%>\% group_by(species) \%>\% mutate(mass_norm = mass / mean(mass, na.rm = TRUE)) }\if{html}{\out{
}} The former normalises \code{mass} by the global average whereas the latter normalises by the averages within species levels. } \section{Methods}{ This function is a \strong{generic}, which means that packages can provide implementations (methods) for other classes. See the documentation of individual methods for extra arguments and differences in behaviour. Methods available in currently loaded packages: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("mutate")}. } \examples{ # Newly created variables are available immediately starwars \%>\% select(name, mass) \%>\% mutate( mass2 = mass * 2, mass2_squared = mass2 * mass2 ) # As well as adding new variables, you can use mutate() to # remove variables and modify existing variables. starwars \%>\% select(name, height, mass, homeworld) \%>\% mutate( mass = NULL, height = height * 0.0328084 # convert to feet ) # Use across() with mutate() to apply a transformation # to multiple columns in a tibble. starwars \%>\% select(name, homeworld, species) \%>\% mutate(across(!name, as.factor)) # see more in ?across # Window functions are useful for grouped mutates: starwars \%>\% select(name, mass, homeworld) \%>\% group_by(homeworld) \%>\% mutate(rank = min_rank(desc(mass))) # see `vignette("window-functions")` for more details # By default, new columns are placed on the far right. df <- tibble(x = 1, y = 2) df \%>\% mutate(z = x + y) df \%>\% mutate(z = x + y, .before = 1) df \%>\% mutate(z = x + y, .after = x) # By default, mutate() keeps all columns from the input data. df <- tibble(x = 1, y = 2, a = "a", b = "b") df \%>\% mutate(z = x + y, .keep = "all") # the default df \%>\% mutate(z = x + y, .keep = "used") df \%>\% mutate(z = x + y, .keep = "unused") df \%>\% mutate(z = x + y, .keep = "none") # Grouping ---------------------------------------- # The mutate operation may yield different results on grouped # tibbles because the expressions are computed within groups. # The following normalises `mass` by the global average: starwars \%>\% select(name, mass, species) \%>\% mutate(mass_norm = mass / mean(mass, na.rm = TRUE)) # Whereas this normalises `mass` by the averages within species # levels: starwars \%>\% select(name, mass, species) \%>\% group_by(species) \%>\% mutate(mass_norm = mass / mean(mass, na.rm = TRUE)) # Indirection ---------------------------------------- # Refer to column names stored as strings with the `.data` pronoun: vars <- c("mass", "height") mutate(starwars, prod = .data[[vars[[1]]]] * .data[[vars[[2]]]]) # Learn more in ?rlang::args_data_masking } \seealso{ Other single table verbs: \code{\link{arrange}()}, \code{\link{filter}()}, \code{\link{reframe}()}, \code{\link{rename}()}, \code{\link{select}()}, \code{\link{slice}()}, \code{\link{summarise}()} } \concept{single table verbs} dplyr/man/na_if.Rd0000644000176200001440000000332114525503021013516 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/na-if.R \name{na_if} \alias{na_if} \title{Convert values to \code{NA}} \usage{ na_if(x, y) } \arguments{ \item{x}{Vector to modify} \item{y}{Value or vector to compare against. When \code{x} and \code{y} are equal, the value in \code{x} will be replaced with \code{NA}. \code{y} is \link[vctrs:theory-faq-coercion]{cast} to the type of \code{x} before comparison. \code{y} is \link[vctrs:theory-faq-recycling]{recycled} to the size of \code{x} before comparison. This means that \code{y} can be a vector with the same size as \code{x}, but most of the time this will be a single value.} } \value{ A modified version of \code{x} that replaces any values that are equal to \code{y} with \code{NA}. } \description{ This is a translation of the SQL command \code{NULLIF}. It is useful if you want to convert an annoying value to \code{NA}. } \examples{ na_if(1:5, 5:1) x <- c(1, -1, 0, 10) 100 / x 100 / na_if(x, 0) y <- c("abc", "def", "", "ghi") na_if(y, "") # `na_if()` allows you to replace `NaN` with `NA`, # even though `NaN == NaN` returns `NA` z <- c(1, NaN, NA, 2, NaN) na_if(z, NaN) # `na_if()` is particularly useful inside `mutate()`, # and is meant for use with vectors rather than entire data frames starwars \%>\% select(name, eye_color) \%>\% mutate(eye_color = na_if(eye_color, "unknown")) # `na_if()` can also be used with `mutate()` and `across()` # to alter multiple columns starwars \%>\% mutate(across(where(is.character), ~na_if(., "unknown"))) } \seealso{ \code{\link[=coalesce]{coalesce()}} to replace missing values with a specified value. \code{\link[tidyr:replace_na]{tidyr::replace_na()}} to replace \code{NA} with a value. } dplyr/man/check_dbplyr.Rd0000644000176200001440000000157714266276767015140 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/compat-dbplyr.R \name{check_dbplyr} \alias{check_dbplyr} \alias{wrap_dbplyr_obj} \title{dbplyr compatibility functions} \usage{ check_dbplyr() wrap_dbplyr_obj(obj_name) } \description{ In dplyr 0.7.0, a number of database and SQL functions moved from dplyr to dbplyr. The generic functions stayed in dplyr (since there is no easy way to conditionally import a generic from different packages), but many other SQL and database helper functions moved. If you have written a backend, these functions generate the code you need to work with both dplyr 0.5.0 dplyr 0.7.0. } \examples{ \dontshow{if (requireNamespace("dbplyr", quietly = TRUE)) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} wrap_dbplyr_obj("build_sql") wrap_dbplyr_obj("base_agg") \dontshow{\}) # examplesIf} } \keyword{internal} dplyr/man/dim_desc.Rd0000644000176200001440000000064514366556340014236 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/utils-format.R \name{dim_desc} \alias{dim_desc} \title{Describing dimensions} \usage{ dim_desc(x) } \arguments{ \item{x}{Object to show dimensions for.} } \description{ Prints the dimensions of an array-like object in a user-friendly manner, substituting \code{NA} with ?? (for SQL queries). } \examples{ dim_desc(mtcars) } \keyword{internal} dplyr/man/nest_join.Rd0000644000176200001440000001353414366556340014460 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/join.R \name{nest_join} \alias{nest_join} \alias{nest_join.data.frame} \title{Nest join} \usage{ nest_join(x, y, by = NULL, copy = FALSE, keep = NULL, name = NULL, ...) \method{nest_join}{data.frame}( x, y, by = NULL, copy = FALSE, keep = NULL, name = NULL, ..., na_matches = c("na", "never"), unmatched = "drop" ) } \arguments{ \item{x, y}{A pair of data frames, data frame extensions (e.g. a tibble), or lazy data frames (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for more details.} \item{by}{A join specification created with \code{\link[=join_by]{join_by()}}, or a character vector of variables to join by. If \code{NULL}, the default, \verb{*_join()} will perform a natural join, using all variables in common across \code{x} and \code{y}. A message lists the variables so that you can check they're correct; suppress the message by supplying \code{by} explicitly. To join on different variables between \code{x} and \code{y}, use a \code{\link[=join_by]{join_by()}} specification. For example, \code{join_by(a == b)} will match \code{x$a} to \code{y$b}. To join by multiple variables, use a \code{\link[=join_by]{join_by()}} specification with multiple expressions. For example, \code{join_by(a == b, c == d)} will match \code{x$a} to \code{y$b} and \code{x$c} to \code{y$d}. If the column names are the same between \code{x} and \code{y}, you can shorten this by listing only the variable names, like \code{join_by(a, c)}. \code{\link[=join_by]{join_by()}} can also be used to perform inequality, rolling, and overlap joins. See the documentation at \link[=join_by]{?join_by} for details on these types of joins. For simple equality joins, you can alternatively specify a character vector of variable names to join by. For example, \code{by = c("a", "b")} joins \code{x$a} to \code{y$a} and \code{x$b} to \code{y$b}. If variable names differ between \code{x} and \code{y}, use a named character vector like \code{by = c("x_a" = "y_a", "x_b" = "y_b")}. To perform a cross-join, generating all combinations of \code{x} and \code{y}, see \code{\link[=cross_join]{cross_join()}}.} \item{copy}{If \code{x} and \code{y} are not from the same data source, and \code{copy} is \code{TRUE}, then \code{y} will be copied into the same src as \code{x}. This allows you to join tables across srcs, but it is a potentially expensive operation so you must opt into it.} \item{keep}{Should the new list-column contain join keys? The default will preserve the join keys for inequality joins.} \item{name}{The name of the list-column created by the join. If \code{NULL}, the default, the name of \code{y} is used.} \item{...}{Other parameters passed onto methods.} \item{na_matches}{Should two \code{NA} or two \code{NaN} values match? \itemize{ \item \code{"na"}, the default, treats two \code{NA} or two \code{NaN} values as equal, like \code{\%in\%}, \code{\link[=match]{match()}}, and \code{\link[=merge]{merge()}}. \item \code{"never"} treats two \code{NA} or two \code{NaN} values as different, and will never match them together or to any other values. This is similar to joins for database sources and to \code{base::merge(incomparables = NA)}. }} \item{unmatched}{How should unmatched keys that would result in dropped rows be handled? \itemize{ \item \code{"drop"} drops unmatched keys from the result. \item \code{"error"} throws an error if unmatched keys are detected. } \code{unmatched} is intended to protect you from accidentally dropping rows during a join. It only checks for unmatched keys in the input that could potentially drop rows. \itemize{ \item For left joins, it checks \code{y}. \item For right joins, it checks \code{x}. \item For inner joins, it checks both \code{x} and \code{y}. In this case, \code{unmatched} is also allowed to be a character vector of length 2 to specify the behavior for \code{x} and \code{y} independently. }} } \value{ The output: \itemize{ \item Is same type as \code{x} (including having the same groups). \item Has exactly the same number of rows as \code{x}. \item Contains all the columns of \code{x} in the same order with the same values. They are only modified (slightly) if \code{keep = FALSE}, when columns listed in \code{by} will be coerced to their common type across \code{x} and \code{y}. \item Gains one new column called \code{{name}} on the far right, a list column containing data frames the same type as \code{y}. } } \description{ A nest join leaves \code{x} almost unchanged, except that it adds a new list-column, where each element contains the rows from \code{y} that match the corresponding row in \code{x}. } \section{Relationship to other joins}{ You can recreate many other joins from the result of a nest join: \itemize{ \item \code{\link[=inner_join]{inner_join()}} is a \code{nest_join()} plus \code{\link[tidyr:unnest]{tidyr::unnest()}}. \item \code{\link[=left_join]{left_join()}} is a \code{nest_join()} plus \code{tidyr::unnest(keep_empty = TRUE)}. \item \code{\link[=semi_join]{semi_join()}} is a \code{nest_join()} plus a \code{filter()} where you check that every element of data has at least one row. \item \code{\link[=anti_join]{anti_join()}} is a \code{nest_join()} plus a \code{filter()} where you check that every element has zero rows. } } \section{Methods}{ This function is a \strong{generic}, which means that packages can provide implementations (methods) for other classes. See the documentation of individual methods for extra arguments and differences in behaviour. The following methods are currently available in loaded packages: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("nest_join")}. } \examples{ df1 <- tibble(x = 1:3) df2 <- tibble(x = c(2, 3, 3), y = c("a", "b", "c")) out <- nest_join(df1, df2) out out$df2 } \seealso{ Other joins: \code{\link{cross_join}()}, \code{\link{filter-joins}}, \code{\link{mutate-joins}} } \concept{joins} dplyr/man/pull.Rd0000644000176200001440000000407614266276767013460 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/pull.R \name{pull} \alias{pull} \title{Extract a single column} \usage{ pull(.data, var = -1, name = NULL, ...) } \arguments{ \item{.data}{A data frame, data frame extension (e.g. a tibble), or a lazy data frame (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for more details.} \item{var}{A variable specified as: \itemize{ \item a literal variable name \item a positive integer, giving the position counting from the left \item a negative integer, giving the position counting from the right. } The default returns the last column (on the assumption that's the column you've created most recently). This argument is taken by expression and supports \link[rlang:topic-inject]{quasiquotation} (you can unquote column names and column locations).} \item{name}{An optional parameter that specifies the column to be used as names for a named vector. Specified in a similar manner as \code{var}.} \item{...}{For use by methods.} } \value{ A vector the same size as \code{.data}. } \description{ \code{pull()} is similar to \code{$}. It's mostly useful because it looks a little nicer in pipes, it also works with remote data frames, and it can optionally name the output. } \section{Methods}{ This function is a \strong{generic}, which means that packages can provide implementations (methods) for other classes. See the documentation of individual methods for extra arguments and differences in behaviour. The following methods are currently available in loaded packages: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("pull")}. } \examples{ mtcars \%>\% pull(-1) mtcars \%>\% pull(1) mtcars \%>\% pull(cyl) \dontshow{if (requireNamespace("dbplyr", quietly = TRUE) && requireNamespace("RSQLite", quietly = TRUE)) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} # Also works for remote sources df <- dbplyr::memdb_frame(x = 1:10, y = 10:1, .name = "pull-ex") df \%>\% mutate(z = x * y) \%>\% pull() \dontshow{\}) # examplesIf} # Pull a named vector starwars \%>\% pull(height, name) } dplyr/man/filter-joins.Rd0000644000176200001440000001115014366556340015065 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/join.R \name{filter-joins} \alias{filter-joins} \alias{semi_join} \alias{semi_join.data.frame} \alias{anti_join} \alias{anti_join.data.frame} \title{Filtering joins} \usage{ semi_join(x, y, by = NULL, copy = FALSE, ...) \method{semi_join}{data.frame}(x, y, by = NULL, copy = FALSE, ..., na_matches = c("na", "never")) anti_join(x, y, by = NULL, copy = FALSE, ...) \method{anti_join}{data.frame}(x, y, by = NULL, copy = FALSE, ..., na_matches = c("na", "never")) } \arguments{ \item{x, y}{A pair of data frames, data frame extensions (e.g. a tibble), or lazy data frames (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for more details.} \item{by}{A join specification created with \code{\link[=join_by]{join_by()}}, or a character vector of variables to join by. If \code{NULL}, the default, \verb{*_join()} will perform a natural join, using all variables in common across \code{x} and \code{y}. A message lists the variables so that you can check they're correct; suppress the message by supplying \code{by} explicitly. To join on different variables between \code{x} and \code{y}, use a \code{\link[=join_by]{join_by()}} specification. For example, \code{join_by(a == b)} will match \code{x$a} to \code{y$b}. To join by multiple variables, use a \code{\link[=join_by]{join_by()}} specification with multiple expressions. For example, \code{join_by(a == b, c == d)} will match \code{x$a} to \code{y$b} and \code{x$c} to \code{y$d}. If the column names are the same between \code{x} and \code{y}, you can shorten this by listing only the variable names, like \code{join_by(a, c)}. \code{\link[=join_by]{join_by()}} can also be used to perform inequality, rolling, and overlap joins. See the documentation at \link[=join_by]{?join_by} for details on these types of joins. For simple equality joins, you can alternatively specify a character vector of variable names to join by. For example, \code{by = c("a", "b")} joins \code{x$a} to \code{y$a} and \code{x$b} to \code{y$b}. If variable names differ between \code{x} and \code{y}, use a named character vector like \code{by = c("x_a" = "y_a", "x_b" = "y_b")}. To perform a cross-join, generating all combinations of \code{x} and \code{y}, see \code{\link[=cross_join]{cross_join()}}.} \item{copy}{If \code{x} and \code{y} are not from the same data source, and \code{copy} is \code{TRUE}, then \code{y} will be copied into the same src as \code{x}. This allows you to join tables across srcs, but it is a potentially expensive operation so you must opt into it.} \item{...}{Other parameters passed onto methods.} \item{na_matches}{Should two \code{NA} or two \code{NaN} values match? \itemize{ \item \code{"na"}, the default, treats two \code{NA} or two \code{NaN} values as equal, like \code{\%in\%}, \code{\link[=match]{match()}}, and \code{\link[=merge]{merge()}}. \item \code{"never"} treats two \code{NA} or two \code{NaN} values as different, and will never match them together or to any other values. This is similar to joins for database sources and to \code{base::merge(incomparables = NA)}. }} } \value{ An object of the same type as \code{x}. The output has the following properties: \itemize{ \item Rows are a subset of the input, but appear in the same order. \item Columns are not modified. \item Data frame attributes are preserved. \item Groups are taken from \code{x}. The number of groups may be reduced. } } \description{ Filtering joins filter rows from \code{x} based on the presence or absence of matches in \code{y}: \itemize{ \item \code{semi_join()} return all rows from \code{x} with a match in \code{y}. \item \code{anti_join()} return all rows from \code{x} with\strong{out} a match in \code{y}. } } \section{Methods}{ These function are \strong{generic}s, which means that packages can provide implementations (methods) for other classes. See the documentation of individual methods for extra arguments and differences in behaviour. Methods available in currently loaded packages: \itemize{ \item \code{semi_join()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("semi_join")}. \item \code{anti_join()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("anti_join")}. } } \examples{ # "Filtering" joins keep cases from the LHS band_members \%>\% semi_join(band_instruments) band_members \%>\% anti_join(band_instruments) # To suppress the message about joining variables, supply `by` band_members \%>\% semi_join(band_instruments, by = join_by(name)) # This is good practice in production code } \seealso{ Other joins: \code{\link{cross_join}()}, \code{\link{mutate-joins}}, \code{\link{nest_join}()} } \concept{joins} dplyr/man/group_by_all.Rd0000644000176200001440000000612214366556340015141 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/colwise-group-by.R \name{group_by_all} \alias{group_by_all} \alias{group_by_at} \alias{group_by_if} \title{Group by a selection of variables} \usage{ group_by_all( .tbl, .funs = list(), ..., .add = FALSE, .drop = group_by_drop_default(.tbl) ) group_by_at( .tbl, .vars, .funs = list(), ..., .add = FALSE, .drop = group_by_drop_default(.tbl) ) group_by_if( .tbl, .predicate, .funs = list(), ..., .add = FALSE, .drop = group_by_drop_default(.tbl) ) } \arguments{ \item{.tbl}{A \code{tbl} object.} \item{.funs}{A function \code{fun}, a quosure style lambda \code{~ fun(.)} or a list of either form.} \item{...}{Additional arguments for the function calls in \code{.funs}. These are evaluated only once, with \link[rlang:dyn-dots]{tidy dots} support.} \item{.add}{See \code{\link[=group_by]{group_by()}}} \item{.drop}{Drop groups formed by factor levels that don't appear in the data? The default is \code{TRUE} except when \code{.data} has been previously grouped with \code{.drop = FALSE}. See \code{\link[=group_by_drop_default]{group_by_drop_default()}} for details.} \item{.vars}{A list of columns generated by \code{\link[=vars]{vars()}}, a character vector of column names, a numeric vector of column positions, or \code{NULL}.} \item{.predicate}{A predicate function to be applied to the columns or a logical vector. The variables for which \code{.predicate} is or returns \code{TRUE} are selected. This argument is passed to \code{\link[rlang:as_function]{rlang::as_function()}} and thus supports quosure-style lambda functions and strings representing function names.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#superseded}{\figure{lifecycle-superseded.svg}{options: alt='[Superseded]'}}}{\strong{[Superseded]}} Scoped verbs (\verb{_if}, \verb{_at}, \verb{_all}) have been superseded by the use of \code{\link[=pick]{pick()}} or \code{\link[=across]{across()}} in an existing verb. See \code{vignette("colwise")} for details. These \link{scoped} variants of \code{\link[=group_by]{group_by()}} group a data frame by a selection of variables. Like \code{\link[=group_by]{group_by()}}, they have optional \link{mutate} semantics. } \section{Grouping variables}{ Existing grouping variables are maintained, even if not included in the selection. } \examples{ # Group a data frame by all variables: group_by_all(mtcars) # -> mtcars \%>\% group_by(pick(everything())) # Group by variables selected with a predicate: group_by_if(iris, is.factor) # -> iris \%>\% group_by(pick(where(is.factor))) # Group by variables selected by name: group_by_at(mtcars, vars(vs, am)) # -> mtcars \%>\% group_by(pick(vs, am)) # Like group_by(), the scoped variants have optional mutate # semantics. This provide a shortcut for group_by() + mutate(): d <- tibble(x=c(1,1,2,2), y=c(1,2,1,2)) group_by_all(d, as.factor) # -> d \%>\% group_by(across(everything(), as.factor)) group_by_if(iris, is.factor, as.character) # -> iris \%>\% group_by(across(where(is.factor), as.character)) } \keyword{internal} dplyr/man/recode.Rd0000644000176200001440000001201414366556340013721 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/recode.R \name{recode} \alias{recode} \alias{recode_factor} \title{Recode values} \usage{ recode(.x, ..., .default = NULL, .missing = NULL) recode_factor(.x, ..., .default = NULL, .missing = NULL, .ordered = FALSE) } \arguments{ \item{.x}{A vector to modify} \item{...}{<\code{\link[rlang:dyn-dots]{dynamic-dots}}> Replacements. For character and factor \code{.x}, these should be named and replacement is based only on their name. For numeric \code{.x}, these can be named or not. If not named, the replacement is done based on position i.e. \code{.x} represents positions to look for in replacements. See examples. When named, the argument names should be the current values to be replaced, and the argument values should be the new (replacement) values. All replacements must be the same type, and must have either length one or the same length as \code{.x}.} \item{.default}{If supplied, all values not otherwise matched will be given this value. If not supplied and if the replacements are the same type as the original values in \code{.x}, unmatched values are not changed. If not supplied and if the replacements are not compatible, unmatched values are replaced with \code{NA}. \code{.default} must be either length 1 or the same length as \code{.x}.} \item{.missing}{If supplied, any missing values in \code{.x} will be replaced by this value. Must be either length 1 or the same length as \code{.x}.} \item{.ordered}{If \code{TRUE}, \code{recode_factor()} creates an ordered factor.} } \value{ A vector the same length as \code{.x}, and the same type as the first of \code{...}, \code{.default}, or \code{.missing}. \code{recode_factor()} returns a factor whose levels are in the same order as in \code{...}. The levels in \code{.default} and \code{.missing} come last. } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#superseded}{\figure{lifecycle-superseded.svg}{options: alt='[Superseded]'}}}{\strong{[Superseded]}} \code{recode()} is superseded in favor of \code{\link[=case_match]{case_match()}}, which handles the most important cases of \code{recode()} with a more elegant interface. \code{recode_factor()} is also superseded, however, its direct replacement is not currently available but will eventually live in \href{https://forcats.tidyverse.org/}{forcats}. For creating new variables based on logical vectors, use \code{\link[=if_else]{if_else()}}. For even more complicated criteria, use \code{\link[=case_when]{case_when()}}. \code{recode()} is a vectorised version of \code{\link[=switch]{switch()}}: you can replace numeric values based on their position or their name, and character or factor values only by their name. This is an S3 generic: dplyr provides methods for numeric, character, and factors. You can use \code{recode()} directly with factors; it will preserve the existing order of levels while changing the values. Alternatively, you can use \code{recode_factor()}, which will change the order of levels to match the order of replacements. } \examples{ char_vec <- sample(c("a", "b", "c"), 10, replace = TRUE) # `recode()` is superseded by `case_match()` recode(char_vec, a = "Apple", b = "Banana") case_match(char_vec, "a" ~ "Apple", "b" ~ "Banana", .default = char_vec) # With `case_match()`, you don't need typed missings like `NA_character_` recode(char_vec, a = "Apple", b = "Banana", .default = NA_character_) case_match(char_vec, "a" ~ "Apple", "b" ~ "Banana", .default = NA) # Throws an error as `NA` is logical, not character. try(recode(char_vec, a = "Apple", b = "Banana", .default = NA)) # `case_match()` is easier to use with numeric vectors, because you don't # need to turn the numeric values into names num_vec <- c(1:4, NA) recode(num_vec, `2` = 20L, `4` = 40L) case_match(num_vec, 2 ~ 20, 4 ~ 40, .default = num_vec) # `case_match()` doesn't have the ability to match by position like # `recode()` does with numeric vectors recode(num_vec, "a", "b", "c", "d") recode(c(1,5,3), "a", "b", "c", "d", .default = "nothing") # For `case_match()`, incompatible types are an error rather than a warning recode(num_vec, `2` = "b", `4` = "d") try(case_match(num_vec, 2 ~ "b", 4 ~ "d", .default = num_vec)) # The factor method of `recode()` can generally be replaced with # `forcats::fct_recode()` factor_vec <- factor(c("a", "b", "c")) recode(factor_vec, a = "Apple") # `recode_factor()` does not currently have a direct replacement, but we # plan to add one to forcats. In the meantime, you can use the `.ptype` # argument to `case_match()`. recode_factor( num_vec, `1` = "z", `2` = "y", `3` = "x", .default = "D", .missing = "M" ) case_match( num_vec, 1 ~ "z", 2 ~ "y", 3 ~ "x", NA ~ "M", .default = "D", .ptype = factor(levels = c("z", "y", "x", "D", "M")) ) } \seealso{ \code{\link[=na_if]{na_if()}} to replace specified values with a \code{NA}. \code{\link[=coalesce]{coalesce()}} to replace missing values with a specified value. \code{\link[tidyr:replace_na]{tidyr::replace_na()}} to replace \code{NA} with a value. } dplyr/man/transmute.Rd0000644000176200001440000000377714406402754014514 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/transmute.R \name{transmute} \alias{transmute} \title{Create, modify, and delete columns} \usage{ transmute(.data, ...) } \arguments{ \item{.data}{A data frame, data frame extension (e.g. a tibble), or a lazy data frame (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for more details.} \item{...}{<\code{\link[rlang:args_data_masking]{data-masking}}> Name-value pairs. The name gives the name of the column in the output. The value can be: \itemize{ \item A vector of length 1, which will be recycled to the correct length. \item A vector the same length as the current group (or the whole data frame if ungrouped). \item \code{NULL}, to remove the column. \item A data frame or tibble, to create multiple columns in the output. }} } \value{ An object of the same type as \code{.data}. The output has the following properties: \itemize{ \item Columns created or modified through \code{...} will be returned in the order specified by \code{...}. \item Unmodified grouping columns will be placed at the front. \item The number of rows is not affected. \item Columns given the value \code{NULL} will be removed. \item Groups will be recomputed if a grouping variable is mutated. \item Data frame attributes are preserved. } } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#superseded}{\figure{lifecycle-superseded.svg}{options: alt='[Superseded]'}}}{\strong{[Superseded]}} \code{transmute()} creates a new data frame containing only the specified computations. It's superseded because you can perform the same job with \code{mutate(.keep = "none")}. } \section{Methods}{ This function is a \strong{generic}, which means that packages can provide implementations (methods) for other classes. See the documentation of individual methods for extra arguments and differences in behaviour. Methods available in currently loaded packages: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("transmute")}. } \keyword{internal} dplyr/man/mutate-joins.Rd0000644000176200001440000003347714406402754015111 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/join.R \name{mutate-joins} \alias{mutate-joins} \alias{join} \alias{join.data.frame} \alias{inner_join} \alias{inner_join.data.frame} \alias{left_join} \alias{left_join.data.frame} \alias{right_join} \alias{right_join.data.frame} \alias{full_join} \alias{full_join.data.frame} \title{Mutating joins} \usage{ inner_join( x, y, by = NULL, copy = FALSE, suffix = c(".x", ".y"), ..., keep = NULL ) \method{inner_join}{data.frame}( x, y, by = NULL, copy = FALSE, suffix = c(".x", ".y"), ..., keep = NULL, na_matches = c("na", "never"), multiple = "all", unmatched = "drop", relationship = NULL ) left_join( x, y, by = NULL, copy = FALSE, suffix = c(".x", ".y"), ..., keep = NULL ) \method{left_join}{data.frame}( x, y, by = NULL, copy = FALSE, suffix = c(".x", ".y"), ..., keep = NULL, na_matches = c("na", "never"), multiple = "all", unmatched = "drop", relationship = NULL ) right_join( x, y, by = NULL, copy = FALSE, suffix = c(".x", ".y"), ..., keep = NULL ) \method{right_join}{data.frame}( x, y, by = NULL, copy = FALSE, suffix = c(".x", ".y"), ..., keep = NULL, na_matches = c("na", "never"), multiple = "all", unmatched = "drop", relationship = NULL ) full_join( x, y, by = NULL, copy = FALSE, suffix = c(".x", ".y"), ..., keep = NULL ) \method{full_join}{data.frame}( x, y, by = NULL, copy = FALSE, suffix = c(".x", ".y"), ..., keep = NULL, na_matches = c("na", "never"), multiple = "all", relationship = NULL ) } \arguments{ \item{x, y}{A pair of data frames, data frame extensions (e.g. a tibble), or lazy data frames (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for more details.} \item{by}{A join specification created with \code{\link[=join_by]{join_by()}}, or a character vector of variables to join by. If \code{NULL}, the default, \verb{*_join()} will perform a natural join, using all variables in common across \code{x} and \code{y}. A message lists the variables so that you can check they're correct; suppress the message by supplying \code{by} explicitly. To join on different variables between \code{x} and \code{y}, use a \code{\link[=join_by]{join_by()}} specification. For example, \code{join_by(a == b)} will match \code{x$a} to \code{y$b}. To join by multiple variables, use a \code{\link[=join_by]{join_by()}} specification with multiple expressions. For example, \code{join_by(a == b, c == d)} will match \code{x$a} to \code{y$b} and \code{x$c} to \code{y$d}. If the column names are the same between \code{x} and \code{y}, you can shorten this by listing only the variable names, like \code{join_by(a, c)}. \code{\link[=join_by]{join_by()}} can also be used to perform inequality, rolling, and overlap joins. See the documentation at \link[=join_by]{?join_by} for details on these types of joins. For simple equality joins, you can alternatively specify a character vector of variable names to join by. For example, \code{by = c("a", "b")} joins \code{x$a} to \code{y$a} and \code{x$b} to \code{y$b}. If variable names differ between \code{x} and \code{y}, use a named character vector like \code{by = c("x_a" = "y_a", "x_b" = "y_b")}. To perform a cross-join, generating all combinations of \code{x} and \code{y}, see \code{\link[=cross_join]{cross_join()}}.} \item{copy}{If \code{x} and \code{y} are not from the same data source, and \code{copy} is \code{TRUE}, then \code{y} will be copied into the same src as \code{x}. This allows you to join tables across srcs, but it is a potentially expensive operation so you must opt into it.} \item{suffix}{If there are non-joined duplicate variables in \code{x} and \code{y}, these suffixes will be added to the output to disambiguate them. Should be a character vector of length 2.} \item{...}{Other parameters passed onto methods.} \item{keep}{Should the join keys from both \code{x} and \code{y} be preserved in the output? \itemize{ \item If \code{NULL}, the default, joins on equality retain only the keys from \code{x}, while joins on inequality retain the keys from both inputs. \item If \code{TRUE}, all keys from both inputs are retained. \item If \code{FALSE}, only keys from \code{x} are retained. For right and full joins, the data in key columns corresponding to rows that only exist in \code{y} are merged into the key columns from \code{x}. Can't be used when joining on inequality conditions. }} \item{na_matches}{Should two \code{NA} or two \code{NaN} values match? \itemize{ \item \code{"na"}, the default, treats two \code{NA} or two \code{NaN} values as equal, like \code{\%in\%}, \code{\link[=match]{match()}}, and \code{\link[=merge]{merge()}}. \item \code{"never"} treats two \code{NA} or two \code{NaN} values as different, and will never match them together or to any other values. This is similar to joins for database sources and to \code{base::merge(incomparables = NA)}. }} \item{multiple}{Handling of rows in \code{x} with multiple matches in \code{y}. For each row of \code{x}: \itemize{ \item \code{"all"}, the default, returns every match detected in \code{y}. This is the same behavior as SQL. \item \code{"any"} returns one match detected in \code{y}, with no guarantees on which match will be returned. It is often faster than \code{"first"} and \code{"last"} if you just need to detect if there is at least one match. \item \code{"first"} returns the first match detected in \code{y}. \item \code{"last"} returns the last match detected in \code{y}. }} \item{unmatched}{How should unmatched keys that would result in dropped rows be handled? \itemize{ \item \code{"drop"} drops unmatched keys from the result. \item \code{"error"} throws an error if unmatched keys are detected. } \code{unmatched} is intended to protect you from accidentally dropping rows during a join. It only checks for unmatched keys in the input that could potentially drop rows. \itemize{ \item For left joins, it checks \code{y}. \item For right joins, it checks \code{x}. \item For inner joins, it checks both \code{x} and \code{y}. In this case, \code{unmatched} is also allowed to be a character vector of length 2 to specify the behavior for \code{x} and \code{y} independently. }} \item{relationship}{Handling of the expected relationship between the keys of \code{x} and \code{y}. If the expectations chosen from the list below are invalidated, an error is thrown. \itemize{ \item \code{NULL}, the default, doesn't expect there to be any relationship between \code{x} and \code{y}. However, for equality joins it will check for a many-to-many relationship (which is typically unexpected) and will warn if one occurs, encouraging you to either take a closer look at your inputs or make this relationship explicit by specifying \code{"many-to-many"}. See the \emph{Many-to-many relationships} section for more details. \item \code{"one-to-one"} expects: \itemize{ \item Each row in \code{x} matches at most 1 row in \code{y}. \item Each row in \code{y} matches at most 1 row in \code{x}. } \item \code{"one-to-many"} expects: \itemize{ \item Each row in \code{y} matches at most 1 row in \code{x}. } \item \code{"many-to-one"} expects: \itemize{ \item Each row in \code{x} matches at most 1 row in \code{y}. } \item \code{"many-to-many"} doesn't perform any relationship checks, but is provided to allow you to be explicit about this relationship if you know it exists. } \code{relationship} doesn't handle cases where there are zero matches. For that, see \code{unmatched}.} } \value{ An object of the same type as \code{x} (including the same groups). The order of the rows and columns of \code{x} is preserved as much as possible. The output has the following properties: \itemize{ \item The rows are affect by the join type. \itemize{ \item \code{inner_join()} returns matched \code{x} rows. \item \code{left_join()} returns all \code{x} rows. \item \code{right_join()} returns matched of \code{x} rows, followed by unmatched \code{y} rows. \item \code{full_join()} returns all \code{x} rows, followed by unmatched \code{y} rows. } \item Output columns include all columns from \code{x} and all non-key columns from \code{y}. If \code{keep = TRUE}, the key columns from \code{y} are included as well. \item If non-key columns in \code{x} and \code{y} have the same name, \code{suffix}es are added to disambiguate. If \code{keep = TRUE} and key columns in \code{x} and \code{y} have the same name, \code{suffix}es are added to disambiguate these as well. \item If \code{keep = FALSE}, output columns included in \code{by} are coerced to their common type between \code{x} and \code{y}. } } \description{ Mutating joins add columns from \code{y} to \code{x}, matching observations based on the keys. There are four mutating joins: the inner join, and the three outer joins. \subsection{Inner join}{ An \code{inner_join()} only keeps observations from \code{x} that have a matching key in \code{y}. The most important property of an inner join is that unmatched rows in either input are not included in the result. This means that generally inner joins are not appropriate in most analyses, because it is too easy to lose observations. } \subsection{Outer joins}{ The three outer joins keep observations that appear in at least one of the data frames: \itemize{ \item A \code{left_join()} keeps all observations in \code{x}. \item A \code{right_join()} keeps all observations in \code{y}. \item A \code{full_join()} keeps all observations in \code{x} and \code{y}. } } } \section{Many-to-many relationships}{ By default, dplyr guards against many-to-many relationships in equality joins by throwing a warning. These occur when both of the following are true: \itemize{ \item A row in \code{x} matches multiple rows in \code{y}. \item A row in \code{y} matches multiple rows in \code{x}. } This is typically surprising, as most joins involve a relationship of one-to-one, one-to-many, or many-to-one, and is often the result of an improperly specified join. Many-to-many relationships are particularly problematic because they can result in a Cartesian explosion of the number of rows returned from the join. If a many-to-many relationship is expected, silence this warning by explicitly setting \code{relationship = "many-to-many"}. In production code, it is best to preemptively set \code{relationship} to whatever relationship you expect to exist between the keys of \code{x} and \code{y}, as this forces an error to occur immediately if the data doesn't align with your expectations. Inequality joins typically result in many-to-many relationships by nature, so they don't warn on them by default, but you should still take extra care when specifying an inequality join, because they also have the capability to return a large number of rows. Rolling joins don't warn on many-to-many relationships either, but many rolling joins follow a many-to-one relationship, so it is often useful to set \code{relationship = "many-to-one"} to enforce this. Note that in SQL, most database providers won't let you specify a many-to-many relationship between two tables, instead requiring that you create a third \emph{junction table} that results in two one-to-many relationships instead. } \section{Methods}{ These functions are \strong{generic}s, which means that packages can provide implementations (methods) for other classes. See the documentation of individual methods for extra arguments and differences in behaviour. Methods available in currently loaded packages: \itemize{ \item \code{inner_join()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("inner_join")}. \item \code{left_join()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("left_join")}. \item \code{right_join()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("right_join")}. \item \code{full_join()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("full_join")}. } } \examples{ band_members \%>\% inner_join(band_instruments) band_members \%>\% left_join(band_instruments) band_members \%>\% right_join(band_instruments) band_members \%>\% full_join(band_instruments) # To suppress the message about joining variables, supply `by` band_members \%>\% inner_join(band_instruments, by = join_by(name)) # This is good practice in production code # Use an equality expression if the join variables have different names band_members \%>\% full_join(band_instruments2, by = join_by(name == artist)) # By default, the join keys from `x` and `y` are coalesced in the output; use # `keep = TRUE` to keep the join keys from both `x` and `y` band_members \%>\% full_join(band_instruments2, by = join_by(name == artist), keep = TRUE) # If a row in `x` matches multiple rows in `y`, all the rows in `y` will be # returned once for each matching row in `x`. df1 <- tibble(x = 1:3) df2 <- tibble(x = c(1, 1, 2), y = c("first", "second", "third")) df1 \%>\% left_join(df2) # If a row in `y` also matches multiple rows in `x`, this is known as a # many-to-many relationship, which is typically a result of an improperly # specified join or some kind of messy data. In this case, a warning is # thrown by default: df3 <- tibble(x = c(1, 1, 1, 3)) df3 \%>\% left_join(df2) # In the rare case where a many-to-many relationship is expected, set # `relationship = "many-to-many"` to silence this warning df3 \%>\% left_join(df2, relationship = "many-to-many") # Use `join_by()` with a condition other than `==` to perform an inequality # join. Here we match on every instance where `df1$x > df2$x`. df1 \%>\% left_join(df2, join_by(x > x)) # By default, NAs match other NAs so that there are two # rows in the output of this join: df1 <- data.frame(x = c(1, NA), y = 2) df2 <- data.frame(x = c(1, NA), z = 3) left_join(df1, df2) # You can optionally request that NAs don't match, giving a # a result that more closely resembles SQL joins left_join(df1, df2, na_matches = "never") } \seealso{ Other joins: \code{\link{cross_join}()}, \code{\link{filter-joins}}, \code{\link{nest_join}()} } \concept{joins} dplyr/man/src_tbls.Rd0000644000176200001440000000076614366556340014306 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/src.R \name{src_tbls} \alias{src_tbls} \title{List all tbls provided by a source.} \usage{ src_tbls(x, ...) } \arguments{ \item{x}{a data src.} \item{...}{other arguments passed on to the individual methods.} } \description{ This is a generic method which individual src's will provide methods for. Most methods will not be documented because it's usually pretty obvious what possible results will be. } \keyword{internal} dplyr/man/filter_all.Rd0000644000176200001440000000632114366556340014601 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/colwise-filter.R \name{filter_all} \alias{filter_all} \alias{filter_if} \alias{filter_at} \title{Filter within a selection of variables} \usage{ filter_all(.tbl, .vars_predicate, .preserve = FALSE) filter_if(.tbl, .predicate, .vars_predicate, .preserve = FALSE) filter_at(.tbl, .vars, .vars_predicate, .preserve = FALSE) } \arguments{ \item{.tbl}{A \code{tbl} object.} \item{.vars_predicate}{A quoted predicate expression as returned by \code{\link[=all_vars]{all_vars()}} or \code{\link[=any_vars]{any_vars()}}. Can also be a function or purrr-like formula. In this case, the intersection of the results is taken by default and there's currently no way to request the union.} \item{.preserve}{when \code{FALSE} (the default), the grouping structure is recalculated based on the resulting data, otherwise it is kept as is.} \item{.predicate}{A predicate function to be applied to the columns or a logical vector. The variables for which \code{.predicate} is or returns \code{TRUE} are selected. This argument is passed to \code{\link[rlang:as_function]{rlang::as_function()}} and thus supports quosure-style lambda functions and strings representing function names.} \item{.vars}{A list of columns generated by \code{\link[=vars]{vars()}}, a character vector of column names, a numeric vector of column positions, or \code{NULL}.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#superseded}{\figure{lifecycle-superseded.svg}{options: alt='[Superseded]'}}}{\strong{[Superseded]}} Scoped verbs (\verb{_if}, \verb{_at}, \verb{_all}) have been superseded by the use of \code{\link[=if_all]{if_all()}} or \code{\link[=if_any]{if_any()}} in an existing verb. See \code{vignette("colwise")} for details. These \link{scoped} filtering verbs apply a predicate expression to a selection of variables. The predicate expression should be quoted with \code{\link[=all_vars]{all_vars()}} or \code{\link[=any_vars]{any_vars()}} and should mention the pronoun \code{.} to refer to variables. } \section{Grouping variables}{ The grouping variables that are part of the selection are taken into account to determine filtered rows. } \examples{ # While filter() accepts expressions with specific variables, the # scoped filter verbs take an expression with the pronoun `.` and # replicate it over all variables. This expression should be quoted # with all_vars() or any_vars(): all_vars(is.na(.)) any_vars(is.na(.)) # You can take the intersection of the replicated expressions: filter_all(mtcars, all_vars(. > 150)) # -> filter(mtcars, if_all(everything(), ~ .x > 150)) # Or the union: filter_all(mtcars, any_vars(. > 150)) # -> filter(mtcars, if_any(everything(), ~ . > 150)) # You can vary the selection of columns on which to apply the # predicate. filter_at() takes a vars() specification: filter_at(mtcars, vars(starts_with("d")), any_vars((. \%\% 2) == 0)) # -> filter(mtcars, if_any(starts_with("d"), ~ (.x \%\% 2) == 0)) # And filter_if() selects variables with a predicate function: filter_if(mtcars, ~ all(floor(.) == .), all_vars(. != 0)) # -> is_int <- function(x) all(floor(x) == x) filter(mtcars, if_all(where(is_int), ~ .x != 0)) } \keyword{internal} dplyr/man/dplyr_tidy_select.Rd0000644000176200001440000000574114472225345016207 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/doc-params.R \name{dplyr_tidy_select} \alias{dplyr_tidy_select} \title{Argument type: tidy-select} \description{ This page describes the \verb{} argument modifier which indicates the argument supports \strong{tidy selections}. Tidy selection provides a concise dialect of R for selecting variables based on their names or properties. Tidy selection is a variant of tidy evaluation. This means that inside functions, tidy-select arguments require special attention, as described in the \emph{Indirection} section below. If you've never heard of tidy evaluation before, start with \code{vignette("programming")}. } \section{Overview of selection features}{ Tidyverse selections implement a dialect of R where operators make it easy to select variables: \itemize{ \item \code{:} for selecting a range of consecutive variables. \item \code{!} for taking the complement of a set of variables. \item \code{&} and \code{|} for selecting the intersection or the union of two sets of variables. \item \code{c()} for combining selections. } In addition, you can use \strong{selection helpers}. Some helpers select specific columns: \itemize{ \item \code{\link[tidyselect:everything]{everything()}}: Matches all variables. \item \code{\link[tidyselect:everything]{last_col()}}: Select last variable, possibly with an offset. \item \code{\link[=group_cols]{group_cols()}}: Select all grouping columns. } Other helpers select variables by matching patterns in their names: \itemize{ \item \code{\link[tidyselect:starts_with]{starts_with()}}: Starts with a prefix. \item \code{\link[tidyselect:starts_with]{ends_with()}}: Ends with a suffix. \item \code{\link[tidyselect:starts_with]{contains()}}: Contains a literal string. \item \code{\link[tidyselect:starts_with]{matches()}}: Matches a regular expression. \item \code{\link[tidyselect:starts_with]{num_range()}}: Matches a numerical range like x01, x02, x03. } Or from variables stored in a character vector: \itemize{ \item \code{\link[tidyselect:all_of]{all_of()}}: Matches variable names in a character vector. All names must be present, otherwise an out-of-bounds error is thrown. \item \code{\link[tidyselect:all_of]{any_of()}}: Same as \code{all_of()}, except that no error is thrown for names that don't exist. } Or using a predicate function: \itemize{ \item \code{\link[tidyselect:where]{where()}}: Applies a function to all variables and selects those for which the function returns \code{TRUE}. } } \section{Indirection}{ There are two main cases: \itemize{ \item If you have a character vector of column names, use \code{all_of()} or \code{any_of()}, depending on whether or not you want unknown variable names to cause an error, e.g. \code{select(df, all_of(vars))}, \code{select(df, !any_of(vars))}. \item If you want the user to be able to supply a tidyselect specification in a function argument, embrace the function argument, e.g. \code{select(df, {{ vars }})}. } } \keyword{internal} dplyr/man/select_all.Rd0000644000176200001440000000574214266276767014614 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/colwise-select.R \name{select_all} \alias{select_all} \alias{rename_all} \alias{select_if} \alias{rename_if} \alias{select_at} \alias{rename_at} \title{Select and rename a selection of variables} \usage{ select_all(.tbl, .funs = list(), ...) rename_all(.tbl, .funs = list(), ...) select_if(.tbl, .predicate, .funs = list(), ...) rename_if(.tbl, .predicate, .funs = list(), ...) select_at(.tbl, .vars, .funs = list(), ...) rename_at(.tbl, .vars, .funs = list(), ...) } \arguments{ \item{.tbl}{A \code{tbl} object.} \item{.funs}{A function \code{fun}, a purrr style lambda \code{~ fun(.)} or a list of either form.} \item{...}{Additional arguments for the function calls in \code{.funs}. These are evaluated only once, with \link[rlang:dyn-dots]{tidy dots} support.} \item{.predicate}{A predicate function to be applied to the columns or a logical vector. The variables for which \code{.predicate} is or returns \code{TRUE} are selected. This argument is passed to \code{\link[rlang:as_function]{rlang::as_function()}} and thus supports quosure-style lambda functions and strings representing function names.} \item{.vars}{A list of columns generated by \code{\link[=vars]{vars()}}, a character vector of column names, a numeric vector of column positions, or \code{NULL}.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#superseded}{\figure{lifecycle-superseded.svg}{options: alt='[Superseded]'}}}{\strong{[Superseded]}} \code{rename_if()}, \code{rename_at()}, and \code{rename_all()} have been superseded by \code{rename_with()}. The matching select statements have been superseded by the combination of a \code{select()} + \code{rename_with()}. Any predicate functions passed as arguments to \code{select()} or \code{rename_with()} must be wrapped in \code{\link[=where]{where()}}. These functions were superseded because \code{mutate_if()} and friends were superseded by \code{across()}. \code{select_if()} and \code{rename_if()} already use tidy selection so they can't be replaced by \code{across()} and instead we need a new function. } \examples{ mtcars <- as_tibble(mtcars) # for nicer printing mtcars \%>\% rename_all(toupper) # -> mtcars \%>\% rename_with(toupper) # NB: the transformation comes first in rename_with is_whole <- function(x) all(floor(x) == x) mtcars \%>\% rename_if(is_whole, toupper) # -> mtcars \%>\% rename_with(toupper, where(is_whole)) mtcars \%>\% rename_at(vars(mpg:hp), toupper) # -> mtcars \%>\% rename_with(toupper, mpg:hp) # You now must select() and then rename mtcars \%>\% select_all(toupper) # -> mtcars \%>\% rename_with(toupper) # Selection drops unselected variables: mtcars \%>\% select_if(is_whole, toupper) # -> mtcars \%>\% select(where(is_whole)) \%>\% rename_with(toupper) mtcars \%>\% select_at(vars(-contains("ar"), starts_with("c")), toupper) # -> mtcars \%>\% select(!contains("ar") | starts_with("c")) \%>\% rename_with(toupper) } \keyword{internal} dplyr/man/funs.Rd0000644000176200001440000000324114406402754013427 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/deprec-funs.R \name{funs} \alias{funs} \title{Create a list of function calls} \usage{ funs(..., .args = list()) } \arguments{ \item{...}{<\code{\link[rlang:args_data_masking]{data-masking}}> A list of functions specified by: \itemize{ \item Their name, \code{"mean"} \item The function itself, \code{mean} \item A call to the function with \code{.} as a dummy argument, \code{mean(., na.rm = TRUE)} } The following notations are \strong{not} supported, see examples: \itemize{ \item An anonymous function, \code{function(x) mean(x, na.rm = TRUE)} \item An anonymous function in \pkg{purrr} notation, \code{~mean(., na.rm = TRUE)} }} \item{.args, args}{A named list of additional arguments to be added to all function calls. As \code{funs()} is being deprecated, use other methods to supply arguments: \code{...} argument in \link[=summarise_at]{scoped verbs} or make own functions with \code{\link[purrr:partial]{purrr::partial()}}.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} \code{funs()} is deprecated; please use \code{list()} instead. We deprecated this function because it provided a unique way of specifying anonymous functions, rather than adopting the conventions used by purrr and other packages in the tidyverse. } \examples{ funs("mean", mean(., na.rm = TRUE)) # -> list(mean = mean, mean = ~ mean(.x, na.rm = TRUE)) funs(m1 = mean, m2 = "mean", m3 = mean(., na.rm = TRUE)) # -> list(m1 = mean, m2 = "mean", m3 = ~ mean(.x, na.rm = TRUE)) } \keyword{internal} dplyr/man/n_distinct.Rd0000644000176200001440000000205614366556340014623 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/n-distinct.R \name{n_distinct} \alias{n_distinct} \title{Count unique combinations} \usage{ n_distinct(..., na.rm = FALSE) } \arguments{ \item{...}{Unnamed vectors. If multiple vectors are supplied, then they should have the same length.} \item{na.rm}{If \code{TRUE}, exclude missing observations from the count. If there are multiple vectors in \code{...}, an observation will be excluded if \emph{any} of the values are missing.} } \value{ A single number. } \description{ \code{n_distinct()} counts the number of unique/distinct combinations in a set of one or more vectors. It's a faster and more concise equivalent to \code{nrow(unique(data.frame(...)))}. } \examples{ x <- c(1, 1, 2, 2, 2) n_distinct(x) y <- c(3, 3, NA, 3, 3) n_distinct(y) n_distinct(y, na.rm = TRUE) # Pairs (1, 3), (2, 3), and (2, NA) are distinct n_distinct(x, y) # (2, NA) is dropped, leaving 2 distinct combinations n_distinct(x, y, na.rm = TRUE) # Also works with data frames n_distinct(data.frame(x, y)) } dplyr/man/explain.Rd0000644000176200001440000000311314366556340014120 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/explain.R \name{explain} \alias{explain} \alias{show_query} \title{Explain details of a tbl} \usage{ explain(x, ...) show_query(x, ...) } \arguments{ \item{x}{An object to explain} \item{...}{Other parameters possibly used by generic} } \value{ The first argument, invisibly. } \description{ This is a generic function which gives more details about an object than \code{\link[=print]{print()}}, and is more focused on human readable output than \code{\link[=str]{str()}}. } \section{Databases}{ Explaining a \code{tbl_sql} will run the SQL \code{EXPLAIN} command which will describe the query plan. This requires a little bit of knowledge about how \code{EXPLAIN} works for your database, but is very useful for diagnosing performance problems. } \examples{ \dontshow{if (requireNamespace("dbplyr", quietly = TRUE) && requireNamespace("RSQLite", quietly = TRUE)) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} \donttest{ lahman_s <- dbplyr::lahman_sqlite() batting <- tbl(lahman_s, "Batting") batting \%>\% show_query() batting \%>\% explain() # The batting database has indices on all ID variables: # SQLite automatically picks the most restrictive index batting \%>\% filter(lgID == "NL" & yearID == 2000L) \%>\% explain() # OR's will use multiple indexes batting \%>\% filter(lgID == "NL" | yearID == 2000) \%>\% explain() # Joins will use indexes in both tables teams <- tbl(lahman_s, "Teams") batting \%>\% left_join(teams, c("yearID", "teamID")) \%>\% explain() } \dontshow{\}) # examplesIf} } dplyr/man/src_dbi.Rd0000644000176200001440000000472614321267312014065 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/deprec-dbi.R \name{src_dbi} \alias{src_dbi} \alias{src_mysql} \alias{src_postgres} \alias{src_sqlite} \title{Source for database backends} \usage{ src_mysql( dbname, host = NULL, port = 0L, username = "root", password = "", ... ) src_postgres( dbname = NULL, host = NULL, port = NULL, user = NULL, password = NULL, ... ) src_sqlite(path, create = FALSE) } \arguments{ \item{dbname}{Database name} \item{host, port}{Host name and port number of database} \item{...}{for the src, other arguments passed on to the underlying database connector, \code{\link[DBI:dbConnect]{DBI::dbConnect()}}. For the tbl, included for compatibility with the generic, but otherwise ignored.} \item{user, username, password}{User name and password. Generally, you should avoid saving username and password in your scripts as it is easy to accidentally expose valuable credentials. Instead, retrieve them from environment variables, or use database specific credential scores. For example, with MySQL you can set up \code{my.cnf} as described in \code{\link[RMySQL:MySQLDriver-class]{RMySQL::MySQL()}}.} \item{path}{Path to SQLite database. You can use the special path ":memory:" to create a temporary in memory database.} \item{create}{if \code{FALSE}, \code{path} must already exist. If \code{TRUE}, will create a new SQLite3 database at \code{path} if \code{path} does not exist and connect to the existing database if \code{path} does exist.} } \value{ An S3 object with class \code{src_dbi}, \code{src_sql}, \code{src}. } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} These functions have been deprecated; instead please use \code{\link[=tbl]{tbl()}} directly on an \code{DBIConnection}. See \url{https://dbplyr.tidyverse.org/} for more details. } \examples{ \dontshow{if (requireNamespace("dbplyr", quietly = TRUE) && requireNamespace("RSQLite", quietly = TRUE)) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} con <- DBI::dbConnect(RSQLite::SQLite(), ":memory:") copy_to(con, mtcars) # To retrieve a single table from a source, use `tbl()` mtcars <- con \%>\% tbl("mtcars") mtcars # You can also use pass raw SQL if you want a more sophisticated query con \%>\% tbl(sql("SELECT * FROM mtcars WHERE cyl == 8")) \dontshow{\}) # examplesIf} } \keyword{internal} dplyr/man/src_local.Rd0000644000176200001440000000137514366556340014431 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/deprec-src-local.R \name{src_local} \alias{src_local} \alias{src_df} \title{A local source} \usage{ src_local(tbl, pkg = NULL, env = NULL) src_df(pkg = NULL, env = NULL) } \arguments{ \item{tbl}{name of the function used to generate \code{tbl} objects} \item{pkg, env}{Either the name of a package or an environment object in which to look for objects.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} This function was deprecated since it existed to support a style of testing dplyr backends that turned out not to be useful. } \keyword{internal} dplyr/man/setops.Rd0000644000176200001440000000416114366556340014001 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/sets.R \name{setops} \alias{setops} \alias{intersect} \alias{union} \alias{union_all} \alias{setdiff} \alias{setequal} \alias{symdiff} \title{Set operations} \usage{ intersect(x, y, ...) union(x, y, ...) union_all(x, y, ...) setdiff(x, y, ...) setequal(x, y, ...) symdiff(x, y, ...) } \arguments{ \item{x, y}{Pair of compatible data frames. A pair of data frames is compatible if they have the same column names (possibly in different orders) and compatible types.} \item{...}{These dots are for future extensions and must be empty.} } \description{ Perform set operations using the rows of a data frame. \itemize{ \item \code{intersect(x, y)} finds all rows in both \code{x} and \code{y}. \item \code{union(x, y)} finds all rows in either \code{x} or \code{y}, excluding duplicates. \item \code{union_all(x, y)} finds all rows in either \code{x} or \code{y}, including duplicates. \item \code{setdiff(x, y)} finds all rows in \code{x} that aren't in \code{y}. \item \code{symdiff(x, y)} computes the symmetric difference, i.e. all rows in \code{x} that aren't in \code{y} and all rows in \code{y} that aren't in \code{x}. \item \code{setequal(x, y)} returns \code{TRUE} if \code{x} and \code{y} contain the same rows (ignoring order). } Note that \code{intersect()}, \code{union()}, \code{setdiff()}, and \code{symdiff()} remove duplicates in \code{x} and \code{y}. } \section{Base functions}{ \code{intersect()}, \code{union()}, \code{setdiff()}, and \code{setequal()} override the base functions of the same name in order to make them generic. The existing behaviour for vectors is preserved by providing default methods that call the base functions. } \examples{ df1 <- tibble(x = 1:3) df2 <- tibble(x = 3:5) intersect(df1, df2) union(df1, df2) union_all(df1, df2) setdiff(df1, df2) setdiff(df2, df1) symdiff(df1, df2) setequal(df1, df2) setequal(df1, df1[3:1, ]) # Note that the following functions remove pre-existing duplicates: df1 <- tibble(x = c(1:3, 3, 3)) df2 <- tibble(x = c(3:5, 5)) intersect(df1, df2) union(df1, df2) setdiff(df1, df2) symdiff(df1, df2) } dplyr/man/with_order.Rd0000644000176200001440000000073513663216626014634 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/order-by.R \name{with_order} \alias{with_order} \title{Run a function with one order, translating result back to original order} \usage{ with_order(order_by, fun, x, ...) } \arguments{ \item{order_by}{vector to order by} \item{fun}{window function} \item{x, ...}{arguments to \code{f}} } \description{ This is used to power the ordering parameters of dplyr's window functions } \keyword{internal} dplyr/man/near.Rd0000644000176200001440000000101513663216626013403 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/near.R \name{near} \alias{near} \title{Compare two numeric vectors} \usage{ near(x, y, tol = .Machine$double.eps^0.5) } \arguments{ \item{x, y}{Numeric vectors to compare} \item{tol}{Tolerance of comparison.} } \description{ This is a safe way of comparing if two vectors of floating point numbers are (pairwise) equal. This is safer than using \code{==}, because it has a built in tolerance } \examples{ sqrt(2) ^ 2 == 2 near(sqrt(2) ^ 2, 2) } dplyr/man/group_by_prepare.Rd0000644000176200001440000000153114366556340016026 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/distinct.R, R/group-by.R \name{distinct_prepare} \alias{distinct_prepare} \alias{group_by_prepare} \title{Same basic philosophy as group_by_prepare(): lazy_dots comes in, list of data and vars (character vector) comes out.} \usage{ distinct_prepare( .data, vars, group_vars = character(), .keep_all = FALSE, caller_env = caller_env(2), error_call = caller_env() ) group_by_prepare( .data, ..., .add = FALSE, .dots = deprecated(), add = deprecated(), error_call = caller_env() ) } \value{ A list \item{data}{Modified tbl} \item{groups}{Modified groups} } \description{ \verb{*_prepare()} performs standard manipulation that is needed prior to actual data processing. They are only be needed by packages that implement dplyr backends. } \keyword{internal} dplyr/man/figures/0000755000176200001440000000000014406402754013631 5ustar liggesusersdplyr/man/figures/lifecycle-defunct.svg0000644000176200001440000000170413663216626017746 0ustar liggesuserslifecyclelifecycledefunctdefunct dplyr/man/figures/lifecycle-maturing.svg0000644000176200001440000000170613663216626020146 0ustar liggesuserslifecyclelifecyclematuringmaturing dplyr/man/figures/logo.png0000644000176200001440000012370514406402754015307 0ustar liggesusersPNG  IHDRޫhgAMA a cHRMz&u0`:pQ<bKGDtIME6 IDATxwx$uUhsI,fRT"mAeAao^|k[Aʖ-˶2E ")a89FTu?fHbtW׽Uo#&oy衇 V?wy@<#WTID\&/6t?Е(F^_ .6\_ǁ,5ߠbuϫIx3{ *"sy?u>N.98:::vgi/5o7|8{r2dttadUCWW^u,䭕kB Ͻpz};55 pxSί>Z[[+s3cik B-"!W+daafffu}SUQ' M_855o Q=vblll6t:줷߿8emq7A>A;X=vs9N366xKgg'nR5 7CRDϽ24?;4Mcff-P(D?ͨKHx?~kMki}4QOJI8f``i4M))B?uuua1h_^ru/SF[=v͕H$,?7N_T$nkjjV Q-N˓k )w#2 ŮR6$~>:::p\_ -~k˖kހނz}q,W\xijj,3|?hk^"ch]4 ?nT`ΝSOU55p8hmmmsk 5W Em@Q=6[\.I:ӟ4Z?o h/ymQ \RD{3@cכ7(r98}4[uEaǎ| !t:I$J;" Wn׮]?~(4p)A5@զ%mmm}x嗪r9>Od6<466{n*}[#Iהk.4H UDF ^o ۸;hll\R( vmQ+|)x .^ mI项ӨMRLLL0::J*~Z[[q8ƈ?<5ڢ%4>lE)(>|SOpĉMsMxt]gjjOFۡ(G\Z ߯ROQu?Lss3`D/G\.d߾}|K_"0;;K,###|3Ao!P(;::dE"LAfffC[#D p. 4?W܍D:Nkk+.kCl[[y{ؿ?\'|H$XoMKK .}tIFGGI$>f<LNNx['gz0 ~x4?!eɤU444_9x BTU!4l۶ ] B155){r蠳k=fggJoaa]}oF[ߴ.A(Fr) 3::! B xffD"aU5[199iEkjjhjj:UUjkkQ]YZZbxx꽺4Mcbby驄Xq}߆oRJx}a4# ~n6ettӧO355uY@(  >|Zb1^}U|>>((.]^׳sed2 .\`iiiR`}>ړU|FFHZ%h~*όV733C2(9+DY:;;RDMM XYnooˑhkk6!f2^]hSNYqEQ1?~Cho ;)Lk\}_ZZbppɪ?aY`v9zhiLNNr!QWWG,#hMVhiivot:iooϝgxx+aR277G8>ZxpiLf%h~?ޫH$ )%~0@n _}!TjY@ (Bcc#===YKKK\5rx<jjjp:4bQމf @~ s-1s'''Xs刮VNVJImm-Pp8yߏT*ECCê&Jz@֫j}=XlI0&1L2>>~U2k{zzYLD@oj?f|#p.݈d}kLsg劢viiia``%rMkk+.\({E"eֶYkD4Etƙ3g<7Kgg'x,O>dUljv+F׷"UN"JGSS$IfxS(m.۳ccceYaf+p8eāQrA/z衫xUx+h~b555(6"yٳ)%G#,Ky<\xhmmepp:i{糧{Y⢍eQߕe型;Y2큶RM=]]][̦T*e+-$wU[U }I(ͯX}c9)Ce;NxW&Nxp8YfCQv߿FqREբmYcmS9? -,,=̇<^w#wch㯐-^- .AFMKԄ]Av-h2o0޽{yW'Zڥ EQVQnNnӆfMd2x<h匿ˢ.6Ks9?8`ڊ JP(A!(YC=tUL[b.2Rկ>;opp f飚rVCgϞIM,cnned2,?n)f~inw>FT_^cm>dV3S㌏.M9N[=LڢYY!m],-f{O5i~,BPSSCss3+^= 7܀Ge>Z-]C255޽{2eZ^kSUU(G婧bjj]vdp\8Nك4?{TJY`e+OTVv[d{p|hU4qC[>A[4-N]) o)h~4Xcu||WMMM-Qږ -ooi $;(hԺ%?\n}>IFGGQVYE0m%a38ǭ\Ckśbm%mqS\gN+Kof%3??OkkUxH&kˊi<:ujYTN:s@Vcc㲇 =~;⋼+ ݶm;hRXƱ_R鳖+(͵g2+8WMnֶ6`dN0mqKh"6R\mSrN2}T3L&$N[9q=ZR̢ۙh4MMM+7yvQŚsNvif$J_*%iZ=Vg5Ý˸sss7y$1::j+H4is vǫWJ f4sssb1sx''377gfv V %I0XLtΝڵv73ܹsW~8SjZ3Pwww q\[KR?)5d'Ճ1|ShUp߻1V+777ǥK\Nv;\=uHlll\=l.cbb#69goݻɺ1xO۲*%h~eYaf~&' +tɑHǏ311]*hSXueϜW5fYt}quYeպuǎ l=%''-3{ 7X7=L&y7yj8v$###% Jjjj,n7gYSUz%L[Q-#Fj x+h~%s=9r_L ;^444{V9 AjEX)TUU+_ Xgs̙+b:oWg-~j%[-BBu}TUE4"DJ- 5MOOo)pp\^@Jouthhh`vvYP1NK-+wk-4a$"Jq=t:ygz˕jao6K&F}WZLêr5aW>p^{͚&Q__VYCſdŊ\d.wRu{q !Ƀ>}ݷ^Opv]w݅rٳgYZZ"SWW>p뭷 V-OOOSOsϭkVJBSOY]&lͯ=N6[y(%;Cʬ2r'''%%)cV W--fj55m*bzz>t]</OH DA<)%/ٳ͢uuu|h_dEhll\v}ƘHd2\m )L[h:^i~[F!8x .2ogggO|3e {fՏ=l'O'ҥK+n7"ϼuE+峚\LV0.w Ehj }0l_5pXMLЧ?iJLm߾J~+k ҥK<8qbXnVTWM淕bI9N[doaڢ4?s]躾 TBULp}vv! c /֚wweEQWzmˠ-"F59mqm+h~asVL󛘘`ppjnn5544zXL]paY !<+|R~ԛgJ6J )7¼>|žekƌ?F[tbǫxyŢ*w{r9{ymѫMF-׻!uPDQN< eĤ-mπz!́4a%J!ft.`.^>lUzzW4RbOTB3r>kf6*&'۪Pd[45Ĥ-3W {bdd"^D`L4 qsmY\{=5@ X={n5R3_-ETp5N0]d2TUEUJAe[788xU!#v6P;vpѱ"dsgffcMOO[3TUeϞ=>}ટW)h+E+jjR[[Kww)WK=Nb/Ir躾iݫ.jޜw5!+D?s?G[[۲f቙XaZR)jkk oi~o6]JUz٣Wr*Ii3-گW)) `!xǯd=5r@2ŋ<Ze+$/|wB!FGGygVvYhhhF+%MLLYl4+!v\Ij5Eb9qWt3-&!?L5 /~<ȩSͲfrUp?PLU2Jv38 ׸b*ñ~(\nݕDQX,Ƴ>{Eοj4R75^.G~n%bll~?e墲WJI/7:2 `][qH lɍ)|sqnR*rؽ{wp z5VROZ.f|l12~AWCg--DVCYff!J\)kOtlʐD%J+)I]J===e ŝ&LcorDW󩬛DJ \&FFc1m&AQ9qϣХ$N75uzY-D.X.D.ˬ#!7֥\1'f3(n?tBgL]b5Y< +JpJW\[!H$#q~ᣧ)D(B*uuw8ۿ@FJ"d.L$If,J(t<0#$It$߆vӋk <[e(7=Vkvd5-P&GW2MO,Ic4NHwQPeg,oSBШ:hR\^YǜKE5pYvJi #E ]7k-As.8w&F7JR+vR"iF$2O :HI]V/bg,Ig2?JDA6Mr :&cXuTc)9)-M A "1*)y9_" ;GKb-\)/H[X@:Yjب盋}C{zMJ"]j׌Mpq7w#˾goYbr>ntS\ӓH H@vS+%JIo^EK7)nl`>>w<R,WCZ5AyS>Pz# ïznf[Q P)d.йh#bd.vQ`^$t>;<A&'yynm Q߅3kv.O D[)`Yohw(5`-zŻAGg56N^晧RqV OESܲ/Zi"_1]|*# $I~v*x!Lri~|AQ ahߡ /G*k?KGL.4?UZ)QPd-oR Hi6n}aob,sYM,,D-naMFjZr*w$pQ]}_߫>&&8%ժIx t_f& b*Ϣ]p)Fd'wr'@E6_‹ 3e_[֧Sy|tvKk;}[RpkԤ$StoV`zz뭫h~Ĵ]OgrNa\>|<ȃRVx[h+|Y]ͧ2Tw-?eeXp,`H}O^%5BJTa.Bqs7 .R"v 0u0zѕiNziGAaIT\w 5Rbj[xqƁ5|* 鎾x΄L\JTMJ^I'8*hE~a7Z(< tiW3oMڵi9*9TS=V:n$Dև5xCf7(U #p/ln;/)_"*S̊\0YmK=@e.fbt'SKhҹ/(8 v8Xf2钯Hzvh~%vho"At{'6Rd5Hő&}={CoCpen\d|WJoυފpƼ><Ȯ-nj JY]rB"u}ӴUL%u9I2hW RK_-G?C%XJjv.5ɴA/uWg_57siWtSV$:Iעy\H셭h4xg@8OSTVFa͐Dt'KIA<9+JR ]:-ߵt@V"w/( tLV|{|\F+|=>|Ym4`ú@Q3: wE*.@w-ƹ1>U5k3G- ZnUBp)D*^J5q"bx"Wx = @ӐtlOh_{wm'@ԮbڵcTO*~gͯ Ý0-AԥD7cZxmI㘢h_<(Ld@U %u<F>|d̟zNH@8pq+.EkK M!H˦]ZH7ɕԮ2s,f_Sѻ^%roO hg'y=ǁK(sQd@G0%zϡ+`41ޥ@ G|%<,'"d*}7>cg"4+pC˞To]I5 ]ˉBNNÿ=<}Azȓu}Kɖx4JVp/4W*M8{HMt0{LXnVeS\F_-um=_!Qѹ^$벑i|6S~5[4q^e^IESP5@TT]EՌ|s9”`aͯ׷Т:6 E3Zܬ&~[&ڼݹg&3Q7]CoGO,Z@릇YjJџK'p~>H$ahZwހY%?^BU75>#(>C|ު(6ՠA+mq'\Iy\Bge ‘ cӜq?־HKg3ev,dF,8xW;iϸ^sLIBp'c(Z Xx ^gٶvL'$5}Cms%m|佈/S!;/) +_:/"?5 λxuK=}RE^ r4X,fKhc-Mvs;>-$#St~p4kU*B)aMui{gs3( 8_-]LJɟ<[U~'q~ aant~p/}gu<[.7*yƓ?ArmPX  2[DeS.զࠛ}m 526M /24NNE\Yq9Kw/stll&K >z;kk6zHc]E.PucdN7絳q׻iWH (fj#&ulY,lUf # x// fCqs,3%U ܴ˥4<6&o,5Xߗo^y )PkƖ:>4#dQ}pG݂;Z !GL6rr*yfjK*mpmEj":Vo,"TGyߡﯫ Vd4QL31mD\xanni^eyH-兓p(nClSH &9WmX$ϫ/V7աuYq#~CU֑OVtz=z$BURkd27^t3h~tQnZI\> dUnR5 sg1R)NU4"_Qp =Fe%hGǃd323$tRk\?KfMm:az,H ߚ$|L򩳕7iHF/à *ĨZB>teo퐑~ڞEǕ&}(RYEGFFp/Zܽrh~*j.Fu'_fumthu/ʒS-@@=)nD7ax5ɟ䪥|nJDo @{Occ5"@MW/]}e\[[˾}iq(t,zC+p 5,5mmm*T7[ #>; }uL.|r)ul;6f{3R-?|]puo؀/£]_Fؔa݁;'ދpR.kWWqxJUr#.GvttDXͯtЌPb\`7Y|H:dq^5 3~c4*-f$JUҼ~kͨ*K]A5Ff*"C{ y>x}0: !iG|S/ GԼƋڭP(Tr] &/N{nNp $hy 9| d"^hp=ЪdoI~%ɿT5ژ8ȴ ^+催 -Hpj })$&P3Vx=O;~_EGkD]. 񙢯 z($>3Y<ꋸgH4x~}vig%=IY:sҥ5i~M>*vv+ ZjXH #B}E%Z0㧵+\*IQUᥬ~9bL2텬{\-¡/ E!En)y;1 5w |"XKyA!/+ ޵Ƌ](eKb}4X&vv(ju%Nbߤ.e^FNJ6*4Ԩhgp2w=÷QTb s*, "^ "[kuQ8<'v!>v*1s $ qv MZEmmm%{W%e\J;h]b6Q@,UZy V隀hC6^.SJ-Eih^"zjuOKue#25\z]dkfrwl\ d܃hp kM^XzGJM^AW!70LcếwjϏ@QhB-]mmK_T6n{IXuɕv"qH] {\i: 5njJ{AG~sSvL籉r9iokۨu6οcJX6ZDҍAK`מDWarw=7![M^ HxQdɅ,0uuu< G3xpq^"D(sYxʙϦUU=xjMN'$>'3`niXz/Vz6nNY;RaE0/i qR֢ȯG&܈KȾiD}vp97_xqo6@uek5H\v͗hx˚WֻXs>avtth>&+SԕH藧~TID Z_2nStWdN1ӓ:ͬlgbE߇0oȡY s=c&B'&+kAs.=Q`ji~ @=d8i'bȼ&P$|ȯ?ؽC7-PpFWi φ|ټTbUDu<=lf"rC(BǥSn^Gmf<&ZҺ=x6?7jfH1*ISug@ Zkh!q7zu0{ͤ{Lrr 0@ba8Di h.?y5#wN )?b&'vY Z|9 ˚Wjra\Whz(%4qpc4U& %M.e+( ]<ͷ]lE %.h@0EguMq!W@:'9: dR~ [ZQAh m Ӵ?pk'9dB^V %X2AAw9pw]7!N6!@osvVKa{ d[Lc5)sssfaGG䤎΋Y+I2ZEHܪ/ ^ vaO`G-@ip2lkj]S#B["n-'n8-|qblڷ ~ߊ q0QwHv@X{+]@'oz˞ t؇ucCԚ<6$f5z8]_{ `Ӝ### d'9n1fBf&$YU'Ԉ;sD9S0GDdE2y;N4:D0qIhs0$ \d7-hں$ 5$ l#a#qW$A~KWya3i"h2~ejR1.O ΃m;\nuF|v,z)z1^byEqk H[dDbcLlv0+5H1MQ3h5ăuH;5bZѩ FGHe61&׳q>tZu Zh\'ӓ  c禍ԳӒLX>@h\ v߭ !ڷ-. 歆kuXv+L%zm-,ʖVQmHt=X,A6;M.M`{4S21,3 @ԝ%Hkg;1#H4̘jK!QK* J?7d!#0/J24x-E4(Qg~h9!]XBH2f3I@#P8o!@j "F*-ɗ/Zּ^Բ|ִfXvd:;R -\1Y~y- ɂL0'जs'(BAu"333+*\vg4*ׇyt<ǫ'Ԃd(ƛyw"?6 z Rؿ>U[Fxd}aԋT"-fQ^c~KC ,ٛ\=-$)tDQ\Jw2?,ȉ -*AS$aOsYIXv1¤X~ך=\F{pU*ۜnTgUX;VdD:W 3܋ AWpfW.)pY_ÜrdBKhǙ]A+`\N݀Ċ'}?ʹsд[`9 Iĝc>/$/rrvXv/SSSCww\&c=+<5(? *3I d#ڠNDny'q7P,<|bk,m9ocJo^8~qH#Y&ͯQufDhi݅~שF =Mۙpjmpn .Y (p:'{.s2G_|Bv;j,ñ0se0?wBqmx濊|` V݌ЁA;)ʅ5y8}&OGq~kA-Uu ̷p?i֗EfXUB2S\S3&If㋌ 0;3^O\Jv;־%#Vd"P{x#øzuk_ Z D4t0}#joP  ljG{POi3R7+ "RǗF_ ,bz)8rxu_4S}1k#02Bu(yEz  f9{Z5۲%M946E81h0AN(RD. !hll5LMMڃkwԆp a:*xtq'EZ5AvD,VP< Rb))uIXS[7'_8GOdxpn^1F58K`v/h~cc-Im>R"tO )Mՙν:}GVARp847#kalllYSH6s "\qr L[v lURu7[\.mUWb,}5y$* x0dLh] ,c2"ã9];:u[.5\q)I/#fO5DQNT@X,f->!PB`?߳Qc5]B=|uj_cǦMRnvpr<VͿ̓hi6u_sp~nl'W2+V9XafQӣlaƤ͉ FIt#LODHi'ft )u9D9AOOn"r-EUUNmmXH#H|>ÁJ/b*ɓ|Np a%Q9EL߷1dHG*E냴=" /5* DQ 2;}C :p5 Gᕣ0=cVlͯ~P+\s)߃N-"JE+j^#*Ql:B*t#"St 5V4EL8 d C* %ӧSG~nqE`'TN@0MaE1a9o,\YE{bOQgPҙb7-q=5,WJTtsMQ689QJ5+Qs8̝lG/[iU "SL,!kFHxc|5#$7@K? үc :ma`Men'b FQ@09Aƭ?λgngQcfypD#4?:xV73Svx<ZJ8sL`jjj,kX ͔H?RJk(V S~h#Wtu*n7 M1,@LG\ӕ3ƢӨpxYjqXz9'D˜\SI&v,l hw(rHIUCcU(h\<-&Zyj{oA֭\]1x( \@kmmEQM\p5M#XVd ુW6l16Wwkw re;o_w"y-)YJo>6]X8#-7Lh/N㈛*f?+= ZĘ}אKByʰ!x۟-չ$ڏaý24Jm HMV>kf|֧_{/.0ܲ4?(4_3 ȿl9o&_qX(e WӪf*044c޽%_[^۷[ph~4Jk(MMpt0WzSï")m_b_t/;~&dѤez&YHAj$@VPDsUgbcQ+Hl)+ڧ/~LJ71KӯqS5' 4r~n"gOA&8qDn=Qk[2߶K)ihfZJnXH,t*pr<йQs~^ƭ/o6_<UOPI)Km=)<23 QH "4~LCn G-@(#Ϳ%1G@ Fq:P@4 <0+i&[X$ٻ /V tKc/؛%===8uM\ ,ǝ_s!N3::zҺF`d2|$Od B4pONI"$'T51Ȁgq뫛;Mqb1O'i\b)rScLOT9PDo>9 8d$GI s] X+bv2AR2&p5McvzI 4?E?ei?w) 'c^;M,O\F&/%e<22ŋzlD^)WqdYZ011A,Ջoܮ/G噷{rm56J$ p85bZY1(Bdd Rufg6 j/n$ 0XDoKf)- y<:su˧M3i~w6b ˩$?^Y4|>b1N8al]%RU񻺺({e|54?UUѤ䥋nFg'- } :?d߼y3a*Wygm]=&)qHz-p"E))c8DUl?y#jKmԉ"ղ 2rh00C)QVSg ,T|5c#֓h~ 5`.ix+Ƀ_MdžHWRV*!D"dOl߹:Ss ^s~|0gBT>n şK@+8K*hP_"+Q͛Z,r{b5 0 r#(UN (v%~%%Kؿ<͎݇ P"VzXic/Å +i~AxC]qӓ/Y) Y,5*~\[7JIqLdW+^hqD"M/~m4|6O6 9"*giR^Q} Af As\qrKNElssusخM6hG\@:j0s;ĿQMcS'X7h~]k?F#p-%K}}=9sP# v)/!^znYzbrEThI:_dA6eWzy! 4}z?_"U) ~'#L+NjmctAr`Z%s(Bn g+OUf`yځ^7= !!/&;Gwg'mq+'7aVDZ`f:usDOkk+DӧOhj7KEW[O?25.͝A.xyLa5dY7i% Z+Vs; w!z/|atRS>ͷ Bh -4("_1dkKVY؊Lj^q+d^j"N PtMMM)/be<'cwW.}5r֣(8\.BZp*,W|fnj,PS6p'\]WUWk_G.~}U"u&|TiSA} 0wz mkvÚQȭ*GUTt:#Tbhh!|WܖV$#3;.Hoo/555[j79xݸ\.@Er 7Ԗpx~o?qes$M>=ԅ.meved\淮J/nDC{AQ}~җٝ7^!%g;| Dߥ^9Slt6a])Bne9P__󧙭m4>]Ф̝-\9GYw}&߂#F!E"OS@-9g{P~xy4KA^ ׼L[Ji~kJ} ;`1n _BbjJd.^jsG+'qeUm +[]ji4=9+AnTaDyaJh]{֦e3p";jTS>/NW>؆s rhT~]6m곱W YstMGϒ}{(fMK]zgXvmg/e(t/296 VxNhh~Yof6u!.ɑiw0A^cT(~DoO71Vt?o ^VCwDhxC_K#2U)cyhdQ H uM+TTNbS:TM f?̌~P5b3i"CP9K=zc r7i~Ĵ6[qTv.ޛm,^BnsfC(x O6Zl%U}mFwqnn܊IDFmK/C⁹P~ٵ.V<3˱64~)>#]ˠk?>YM(9=Iލꦃ׼T^9N;pXp29{VN0BoY^щ㥭>A8ٛ$DfꂯO0qV}m/лf-2 C+Pgh/t!R du4E &]q4ff6-ݦM/h 4T4nz8]G)p(Iҩ }1[> w7Ә7MN6i#KAFWHԬR=Pb}{IWײk9eE8g=lB\zϾύ|"z F^o^ۧH55iRҾD϶Y%lL&D禷J $..dl*v|&9J\NgrjIB%h~eZgh+M3\ڬo?nf_עOĻൡ_y׆Bs*B| (&, 7t'7]Et>t9GGGf1s+W^CzYvΡ4ͺk/W )!Bde ٟ!شh \]Yt G 4?r*__wdͦfsKmKanl Lxh7?B4?ի2L受ՆC$7/#Ȑ'Uw-Ƀw4׍ͬ/\m]Pc\N#ΒeO nN,˖7#!ѥ܇o5ΧDA ۜL*x>+gP:Pg ecb2I5@cڗ*y(q :]I4d^+q A e͕o"!ԠLӮ>F_*3d9&'&h~4?֠M~4JofH) o͎!PΗe k|-Hv9 oZ-x=Nv-񾛇e,^J?d_2Xϵ^*$͎,ĹAGM\Hy됲ݻ zj_GƟaWܨm,k\I$_3XJBfc,)>q#dŨI8QwYf:iLjU,WMQv>:˅ &bqiZV2)EOG}w&sg<|X~~n~{ǮU5vyrÙA񄟘Z빘⡇ZaJ)fE[K#|h>[nkmK#8_7΄i6˂TIR Ъjxh=Vv䤟ُ3櫶W|r z[N3<=OXM*4#?uwPޟ垽Vtz\inEߝ£?qH8ęd =4C7g5˴UU-+XV\.gE@]8z] w.6DGl5+p!#ƙ @ #hUIg&n p)Q{ *BCy͙s,;^I4:<1p}Z'L54 fxM]}?4>tL=u9x|gA{f <]Q7L5V e_݇xp/M5)DKo$ɠK*ݳ#rP7lnxr?`u#29CdO3Z܁/g6n8o٬1w _gg'mx]j/s=ț0wL6g&2$+pSMAHq<[UB^}Qv_x\5HvD4 1Z*T>|Z`p.Lt}ou2#"5A0,-mph~4\4?zyA?0Y {k|mEn,Ut—B ]Y^Kr }N>"ҿ(poƵ6-O L85yri~o9Ռ;7w wI~ 0< xvg_i`'ik0lYmh}J, 8'sȭpj-̦uÞ ? y^m t1{/nx dw%F^= +i~] b0mT{}0v Ot>X)oddl13=хew{<;f ~n9 CS^VP$1r MYWE4?s^j4.Z[۩ܴ}<̞0YQLaNDF)U[/܄l4wirfIvy{4kpLnK5o߆-!T0cHC6R}789^ 3307m5&3\}EPe7A|j/bA+%%/kEŗ5`Fц|`9jtU@ Rq yk3,L2z~ki~uA3QÛpo`0x@p[ d0FT_#/HW٤R_wwM-45gs;P`k ֺ@]9R=jM[mctnx5)x*3Lg?`\?;9Qmg֘ҷI3N¡1j"LFy}^ߪk|rb4 2! Eh0pΨ^xR4?B@lE0vӑ/RG{)#-._['un>{o N` 1G]/ܔ\osI$mjbFHW|#q|de÷\QzP<|#G.s#'"LCoHp 8 (cFܺi~fӾV;DJs >DXKk$Rhx ]pD`lb4Zx6G5BD*kiʆqZE$sMdgѥle4nA?u;gqk`gה<9aLC8 tw-p?Z((^!Y9\3]'W&XU\9??og Ү&c#\h7/΅SjaSm|8uLh/_@19_n[eWT3z&]!zRja3ȥ=G /W_)ɃGq YpÙ,8m֍鳎f<|K_.;`2x97kCfH;Ì; #7z bқ%$UyFiD mO %45 ִJp/䂯(۔ϱ0}u.GLo#4RU0CLj1{ޑ @I= 'NkM[X4Jkps3EoPTAkf[5 M\|uUi~]]4:{Q:S y!Uкᜃ#u< ӓ7*_DeD{,, I :y/QfNi~k{Gj<-K*1iW|m p:ͨ!T)\>W?7+j2{܉uȚYda[uT{m*(i70eHjl90s ^ytbim흄Nna-_AN`1߀[)Iּ@pv6Cnb7Q-:ZLGs؁=Zۂ ԩw;ʦj<-75n _`]DŽ'ĸ'ĘiwG 5IdiH9?9=yLv\$V^Uò:[NDߍ.-lY{80\X4n?48"Vy  * aVtUG'G"%8, ^3=C=iwhz6탸E Hnp~/69x{pW_)a:4Z4V;㴙bRT`ityۀ< t46w1٬\qΧL'D{\5OOO5v8h3_Y?~%",|s] ؈x<V<6,IN軙b^l*lf,6K!';, + 7B{$ ӂ*З34Mh~]n40:,;}R7!<λM^@kD^VF3n+Xe4Mhj66t7ؽx ٟe^2^\|9mCT]\&IDAT.gttm70NDENX![\4r/ao gD=J-3"75]EC㶌*82r|Ǧ&P_ޮ%޻SF܊9- 3kCNb㻮5FýsIaD'kx8\0oɷm5VA41$jbIs,w4j,]T:Fs2{̏\ͬb0i-i~>+YdIͬv+aR) iKXy:a{ȑ?g00.#{W;4yV׸g;.P輤≛5FhҠM~^GDoK so`0d=98 sI]Na3[Ye!x:|1\ڶV\5kz2Dil]iqOKoWvO7yDXAf[:kc5א JKj4I1yF^r]m1pdf9` 7,ϜH$*h s7`c,jF2}Nvq+#ÑI^y}NƉ 4v(~:=8tC=_* &:`a .4:Js+tɸ{H܅r4?g{ťU%etDe?s-=,j-£ȩpR,VP&h5YC\vb^6f@|s2E D~LZ2v o0kCHAJä@і2h>>%g/J>N9ͯƐ[vޛݱ"LV _.7JJc݆6FͶOA|^8p,SxO۵BR4R6~[\\5"Ϥ *qgN 5M0VO{4m\v]ϼ~+YxQNnz9I haEhi~-^nmZ\ߒJ밂3-4Y$PUܵSz@v5uGxMqngU=K sJ6\`\ѡ}@uAKFk]X3fmvZKo!kd)7ͯZ=FJ2@LeQ 1 z%@[8|$ 0bW#ۻͯZkciWWR>yQ&7EZ5 gةslf"sl{Asaqf6cM;r:1?׃hߋ&ؐ/0lNy(TLӺ֩^-d܆>t#?+` KH yڡj`SlBt`xfvH;p2S m)ufZonr -8{, l5ZVLMXˢ~ RZb3OB|;lO4e5Q}P2X,kRwc+7/OJDl9borVO8T5rIJLʹ}2R?ε0sI4exxxo#/WZWUUTU@[u߄~4 l9T4"-,jYoYDZ@ E~n1RC"{y4eeΔmX낊/47+Gkwߡ0_< ZP9ťlQMy7%U2*R沢Tܥ n܇!esҥ 7"_n)!+=$dK^DҲM: `S͔e@ԧ`".:;Y4355em]~nN0)=R75ineh;\7js\AB󘂚]...  $ǝB<e@˗/3::UUtvtPp Aۯ0mׂL 1kB"ƾ G@_fF]SSS/<~Fe9H$00hqC5- ]u np9-72n~Fpʸuy /zwlR+}L'G3.DFb Uss'5SbdpTImg˥%iyA!Fw-`j7i!a\zqMNvh3{+#wj(t鏚B>Ico,Z:jJɂtm J~"`~8SKh\cbb_J|c~sHp9 @LJJ#WnJwz BvhAL1I_,6e_1eX[@r W###n< h?QT޸V*J|/`Ɯ\Yf)iSRJK<hfg(vfv5Pʀ@?D*gB!悁nԂqM!^!2?PJEQZ`~/98w+o`:Bo39_@ȟ0ۓ=M)KC #p|sF߽mН=0dA{4ŎYjkܞd1Au[3^CAКTdO[s +`8e}rz8עrrG4ģ qLP,cap1sbfW܁^~%C? \KHF&8_LXƚݒ}qڸ۔Y\  , 3,Kf,~+LlT:܇𜁪>pM빺6L G$luW}ݾZ)'71 J78k~nfX7\(.)Mxl7߻ر SQP+m|eZV*Ihm:kn6QprLt:G*淦wjB*2+91O)Zׁ{ 5"Z={E156P}9 E1T,|}4̅]\oIf26nm5?׋0N!yS91el;D(DUC!7/#_ fH [_eBlt Mþ&LJpdTp  ers$9|{>ې(XaFDyfqTs{L<\8 oDdE^/SS&ws#cMDUmT?{ǖN#X#Hh6@I "u2s23l2MDU;%fLG] *t%MOz ϡF8"7i0  yF%ģq ט6 vt"$҄~2.}0q]=%c[b>drC/]0Kfj~D"Y<آ!k|<5b0v(T"NBpLU ›'埐O711]["CPWA97մ.*4(D_^G:9;T*iIlTM]Z=m@c#Q ?I'(c#0WX-,_! H:l;;XCD-a20hzÉc7_8N?fƭ >MimE8?1ոNݰҴ8e}~tzo1x5O[ J<{G)`9Β+i`,a\U0pYBB oA\4"T@&"t Oy8Tȕ7 P5;QLsK?8#63s`~Һ"`/q]`ou!~9|kqwE4>-Q>MPe슘6wW‹X-oƕЄ4&{냠w]z6"x7twulLV u70,-~aH#.5u 9J= ߐ1.+75%3MR+|\g1qQhNhK<5Z>%^󗃼Xg~)шþ)ு!C̯P6bϱ0BQ4WFGф,Yann.Y&{OTN1×!A$n,+P-V*[ip~xmܥj {f_I]Yt ޙ 8 h`~_N7q m(م-Ml1+76F0 Ͷ4wo\槫V$6(F|Tƭ&*2}k.n ng/Ld7t6`JMpl?~m15B\Qn߫_ 6q mHâ޸[.9ð9N:Lܠ!_c\* $]ɿ?cOÑY~;)xsDԔꦥ*F3bGko_c5˧1?ؘ l`C)F[V:W!w`+AhsG ؖXWpU9lE66FqR S5}AdHjn 3|j{څn [xŹ1kԮf6tof$_~YLM=n7yaP#GY [5Sҿ-DCa_m\רo% sFc6\qƹj |4d7Pw*6*=8{]2nF6-ނ-._ĚM.hEgyR>ȹWP)1!C=]HU(W7eglKF|W{1747N0B0l>dZآn{ZGGWN#ekb̦h2S &ݼtçCƤ<dS%Aެcuy.&#%{ia~Ue`X4n?^99=mCԖe?3Z*= z14>qnj#5PLs4x_͟a8T}צss01$v5BJ1$ׯ#ZRƤ r4]̡X3@&f4шmj.l&J*2n&蝮O?X1ΝBNN}'pUk`C)iEko\bӸq쭝ZXp $*j^Z8w5>;,͹*{i- wd3+z[1TĹuuh;lI0/@qЄB]^>sMjy({vsjkJ̯󛅫߸*RխbGR>{:}mWsK&]vM(9T;6H߳&1ab~yac{<`~Cb~'o%0z([V[4RiEU֘kFBQ>3@12U60W(m'n`bҟe UUQUuA>25! YdVCnz|''5oӘ_8V6/ ⧐1"iiddi^E<'Q5.rw XΓ'vrj&o1o0Aƺ+NQ x[|?V[B ( d™|x`KNW;97]{ca~Ri-BB,aLlUa>m-45|NX-{)1Bher#ݙoNm_ByptN`8\=*1oj̯P*8-->29?\V;}e_tDБ@W}M<6"֯ssH/J0Bh,d1[|iE8H$ ٜa+Q:BSy$r'ykw;Vm<`~/cb~*^4sYǦq+\ n2ۆ;nޙo-g"J̯P*xZ[|9ޚ+Qqol}[GWiVlO/F޼9ƹȡ\Ř_T4pJ1^$ײآUcxJ4Ž\[Ҹ`D uj;+̛')qhT4p-މ-V::R1Il:#7€"k6xl7s%Z&!1W( g`@&ܘɹ&h-ؾ\0){U%́^"=xZ12T2yb_bRBQŋڵ ds9@nS ᄋ -󔮴H,CJNQEvUpdc^g#k塃R^C-->%ĵb{<rSYd{9#i6~2ķu2*jsgxc~RP Nd||7fVevy{5dyEW¥|B'?hzȜC@}O)S5Ⱥǟ#lQP[m#sf%eYU[纸,s0#hBh+eX݅-nI>Q.wR m-D9. bb~^`ѼU%#0+kx{XB:8-Lz0{"*x-N3>v'G8(b~BEo 1]n[ ߄qhWH)Fށ̭9`r]5qT4T x+rX^V-^A_mX e1r9&x,E [&q7JV7#&Cl1 # _? *xh lѨ11M7x`)?DV9(b~TEoR`Db7dxq$}ߦVX)67OiNEn^ |(ćH/U4U$KNSe({5֎X:%tEXtdate:create2023-03-06T19:54:22+00:00r %tEXtdate:modify2023-03-06T19:54:22+00:00mtEXtSoftwareAdobe ImageReadyqe<IENDB`dplyr/man/figures/lifecycle-archived.svg0000644000176200001440000000170713663216626020106 0ustar liggesusers lifecyclelifecyclearchivedarchived dplyr/man/figures/lifecycle-soft-deprecated.svg0000644000176200001440000000172613663216626021373 0ustar liggesuserslifecyclelifecyclesoft-deprecatedsoft-deprecated dplyr/man/figures/lifecycle-questioning.svg0000644000176200001440000000171413663216626020664 0ustar liggesuserslifecyclelifecyclequestioningquestioning dplyr/man/figures/lifecycle-superseded.svg0000644000176200001440000000171313663216626020461 0ustar liggesusers lifecyclelifecyclesupersededsuperseded dplyr/man/figures/lifecycle-stable.svg0000644000176200001440000000167413663216626017576 0ustar liggesuserslifecyclelifecyclestablestable dplyr/man/figures/lifecycle-experimental.svg0000644000176200001440000000171613663216626021016 0ustar liggesuserslifecyclelifecycleexperimentalexperimental dplyr/man/figures/lifecycle-deprecated.svg0000644000176200001440000000171213663216626020415 0ustar liggesuserslifecyclelifecycledeprecateddeprecated dplyr/man/figures/lifecycle-retired.svg0000644000176200001440000000170513663216626017755 0ustar liggesusers lifecyclelifecycleretiredretired dplyr/man/consecutive_id.Rd0000644000176200001440000000163114366556340015466 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/consecutive-id.R \name{consecutive_id} \alias{consecutive_id} \title{Generate a unique identifier for consecutive combinations} \usage{ consecutive_id(...) } \arguments{ \item{...}{Unnamed vectors. If multiple vectors are supplied, then they should have the same length.} } \value{ A numeric vector the same length as the longest element of \code{...}. } \description{ \code{consecutive_id()} generates a unique identifier that increments every time a variable (or combination of variables) changes. Inspired by \code{data.table::rleid()}. } \examples{ consecutive_id(c(TRUE, TRUE, FALSE, FALSE, TRUE, FALSE, NA, NA)) consecutive_id(c(1, 1, 1, 2, 1, 1, 2, 2)) df <- data.frame(x = c(0, 0, 1, 0), y = c(2, 2, 2, 2)) df \%>\% group_by(x, y) \%>\% summarise(n = n()) df \%>\% group_by(id = consecutive_id(x, y), x, y) \%>\% summarise(n = n()) } dplyr/man/reexports.Rd0000644000176200001440000000316314366556340014520 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/reexport-magrittr.R, R/reexport-pillar.R, % R/reexport-tibble.R, R/select-helpers.R \docType{import} \name{reexports} \alias{reexports} \alias{\%>\%} \alias{type_sum} \alias{data_frame} \alias{as_data_frame} \alias{lst} \alias{add_row} \alias{tribble} \alias{tibble} \alias{as_tibble} \alias{view} \alias{contains} \alias{select_helpers} \alias{ends_with} \alias{everything} \alias{matches} \alias{num_range} \alias{one_of} \alias{starts_with} \alias{last_col} \alias{any_of} \alias{all_of} \alias{where} \title{Objects exported from other packages} \keyword{internal} \description{ These objects are imported from other packages. Follow the links below to see their documentation. \describe{ \item{magrittr}{\code{\link[magrittr:pipe]{\%>\%}}} \item{pillar}{\code{\link[pillar]{type_sum}}} \item{tibble}{\code{\link[tibble]{add_row}}, \code{\link[tibble:deprecated]{as_data_frame}}, \code{\link[tibble]{as_tibble}}, \code{\link[tibble:deprecated]{data_frame}}, \code{\link[tibble]{lst}}, \code{\link[tibble]{tibble}}, \code{\link[tibble]{tribble}}, \code{\link[tibble]{view}}} \item{tidyselect}{\code{\link[tidyselect]{all_of}}, \code{\link[tidyselect:all_of]{any_of}}, \code{\link[tidyselect:starts_with]{contains}}, \code{\link[tidyselect:starts_with]{ends_with}}, \code{\link[tidyselect]{everything}}, \code{\link[tidyselect:everything]{last_col}}, \code{\link[tidyselect:starts_with]{matches}}, \code{\link[tidyselect:starts_with]{num_range}}, \code{\link[tidyselect]{one_of}}, \code{\link[tidyselect]{starts_with}}, \code{\link[tidyselect]{where}}} }} dplyr/man/make_tbl.Rd0000644000176200001440000000113414472225345014233 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/tbl.R \name{make_tbl} \alias{make_tbl} \title{Create a "tbl" object} \usage{ make_tbl(subclass, ...) } \arguments{ \item{subclass}{name of subclass. "tbl" is an abstract base class, so you must supply this value. \code{tbl_} is automatically prepended to the class name} \item{...}{For \code{tbl()}, other fields used by class. For \code{as.tbl()}, other arguments passed to methods.} } \description{ \code{tbl()} is the standard constructor for tbls. \code{as.tbl()} coerces, and \code{is.tbl()} tests. } \keyword{internal} dplyr/man/group_by.Rd0000644000176200001440000001262114366556340014312 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/group-by.R \name{group_by} \alias{group_by} \alias{ungroup} \title{Group by one or more variables} \usage{ group_by(.data, ..., .add = FALSE, .drop = group_by_drop_default(.data)) ungroup(x, ...) } \arguments{ \item{.data}{A data frame, data frame extension (e.g. a tibble), or a lazy data frame (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for more details.} \item{...}{In \code{group_by()}, variables or computations to group by. Computations are always done on the ungrouped data frame. To perform computations on the grouped data, you need to use a separate \code{mutate()} step before the \code{group_by()}. Computations are not allowed in \code{nest_by()}. In \code{ungroup()}, variables to remove from the grouping.} \item{.add}{When \code{FALSE}, the default, \code{group_by()} will override existing groups. To add to the existing groups, use \code{.add = TRUE}. This argument was previously called \code{add}, but that prevented creating a new grouping variable called \code{add}, and conflicts with our naming conventions.} \item{.drop}{Drop groups formed by factor levels that don't appear in the data? The default is \code{TRUE} except when \code{.data} has been previously grouped with \code{.drop = FALSE}. See \code{\link[=group_by_drop_default]{group_by_drop_default()}} for details.} \item{x}{A \code{\link[=tbl]{tbl()}}} } \value{ A grouped data frame with class \code{\link{grouped_df}}, unless the combination of \code{...} and \code{add} yields a empty set of grouping columns, in which case a tibble will be returned. } \description{ Most data operations are done on groups defined by variables. \code{group_by()} takes an existing tbl and converts it into a grouped tbl where operations are performed "by group". \code{ungroup()} removes grouping. } \section{Methods}{ These function are \strong{generic}s, which means that packages can provide implementations (methods) for other classes. See the documentation of individual methods for extra arguments and differences in behaviour. Methods available in currently loaded packages: \itemize{ \item \code{group_by()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("group_by")}. \item \code{ungroup()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("ungroup")}. } } \section{Ordering}{ Currently, \code{group_by()} internally orders the groups in ascending order. This results in ordered output from functions that aggregate groups, such as \code{\link[=summarise]{summarise()}}. When used as grouping columns, character vectors are ordered in the C locale for performance and reproducibility across R sessions. If the resulting ordering of your grouped operation matters and is dependent on the locale, you should follow up the grouped operation with an explicit call to \code{\link[=arrange]{arrange()}} and set the \code{.locale} argument. For example: \if{html}{\out{
}}\preformatted{data \%>\% group_by(chr) \%>\% summarise(avg = mean(x)) \%>\% arrange(chr, .locale = "en") }\if{html}{\out{
}} This is often useful as a preliminary step before generating content intended for humans, such as an HTML table. \subsection{Legacy behavior}{ Prior to dplyr 1.1.0, character vector grouping columns were ordered in the system locale. If you need to temporarily revert to this behavior, you can set the global option \code{dplyr.legacy_locale} to \code{TRUE}, but this should be used sparingly and you should expect this option to be removed in a future version of dplyr. It is better to update existing code to explicitly call \code{arrange(.locale = )} instead. Note that setting \code{dplyr.legacy_locale} will also force calls to \code{\link[=arrange]{arrange()}} to use the system locale. } } \examples{ by_cyl <- mtcars \%>\% group_by(cyl) # grouping doesn't change how the data looks (apart from listing # how it's grouped): by_cyl # It changes how it acts with the other dplyr verbs: by_cyl \%>\% summarise( disp = mean(disp), hp = mean(hp) ) by_cyl \%>\% filter(disp == max(disp)) # Each call to summarise() removes a layer of grouping by_vs_am <- mtcars \%>\% group_by(vs, am) by_vs <- by_vs_am \%>\% summarise(n = n()) by_vs by_vs \%>\% summarise(n = sum(n)) # To removing grouping, use ungroup by_vs \%>\% ungroup() \%>\% summarise(n = sum(n)) # By default, group_by() overrides existing grouping by_cyl \%>\% group_by(vs, am) \%>\% group_vars() # Use add = TRUE to instead append by_cyl \%>\% group_by(vs, am, .add = TRUE) \%>\% group_vars() # You can group by expressions: this is a short-hand # for a mutate() followed by a group_by() mtcars \%>\% group_by(vsam = vs + am) # The implicit mutate() step is always performed on the # ungrouped data. Here we get 3 groups: mtcars \%>\% group_by(vs) \%>\% group_by(hp_cut = cut(hp, 3)) # If you want it to be performed by groups, # you have to use an explicit mutate() call. # Here we get 3 groups per value of vs mtcars \%>\% group_by(vs) \%>\% mutate(hp_cut = cut(hp, 3)) \%>\% group_by(hp_cut) # when factors are involved and .drop = FALSE, groups can be empty tbl <- tibble( x = 1:10, y = factor(rep(c("a", "c"), each = 5), levels = c("a", "b", "c")) ) tbl \%>\% group_by(y, .drop = FALSE) \%>\% group_rows() } \seealso{ Other grouping functions: \code{\link{group_map}()}, \code{\link{group_nest}()}, \code{\link{group_split}()}, \code{\link{group_trim}()} } \concept{grouping functions} dplyr/man/group_split.Rd0000644000176200001440000000432014366556340015030 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/group-split.R \name{group_split} \alias{group_split} \title{Split data frame by groups} \usage{ group_split(.tbl, ..., .keep = TRUE) } \arguments{ \item{.tbl}{A tbl.} \item{...}{If \code{.tbl} is an ungrouped data frame, a grouping specification, forwarded to \code{\link[=group_by]{group_by()}}.} \item{.keep}{Should the grouping columns be kept?} } \value{ A list of tibbles. Each tibble contains the rows of \code{.tbl} for the associated group and all the columns, including the grouping variables. Note that this returns a \link[vctrs:list_of]{list_of} which is slightly stricter than a simple list but is useful for representing lists where every element has the same type. } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} \code{\link[=group_split]{group_split()}} works like \code{\link[base:split]{base::split()}} but: \itemize{ \item It uses the grouping structure from \code{\link[=group_by]{group_by()}} and therefore is subject to the data mask \item It does not name the elements of the list based on the grouping as this only works well for a single character grouping variable. Instead, use \code{\link[=group_keys]{group_keys()}} to access a data frame that defines the groups. } \code{group_split()} is primarily designed to work with grouped data frames. You can pass \code{...} to group and split an ungrouped data frame, but this is generally not very useful as you want have easy access to the group metadata. } \section{Lifecycle}{ \code{group_split()} is not stable because you can achieve very similar results by manipulating the nested column returned from \code{\link[tidyr:nest]{tidyr::nest(.by =)}}. That also retains the group keys all within a single data structure. \code{group_split()} may be deprecated in the future. } \examples{ ir <- iris \%>\% group_by(Species) group_split(ir) group_keys(ir) } \seealso{ Other grouping functions: \code{\link{group_by}()}, \code{\link{group_map}()}, \code{\link{group_nest}()}, \code{\link{group_trim}()} } \concept{grouping functions} \keyword{internal} dplyr/man/ident.Rd0000644000176200001440000000131214303720412013542 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/compat-dbplyr.R \name{ident} \alias{ident} \title{Flag a character vector as SQL identifiers} \usage{ ident(...) } \arguments{ \item{...}{A character vector, or name-value pairs} } \description{ \code{ident()} takes unquoted strings and flags them as identifiers. \code{ident_q()} assumes its input has already been quoted, and ensures it does not get quoted again. This is currently used only for \code{schema.table}. } \examples{ # Identifiers are escaped with " \dontshow{if (requireNamespace("dbplyr", quietly = TRUE)) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} ident("x") \dontshow{\}) # examplesIf} } dplyr/man/across.Rd0000644000176200001440000002110614406402754013746 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/across.R \name{across} \alias{across} \alias{if_any} \alias{if_all} \title{Apply a function (or functions) across multiple columns} \usage{ across(.cols, .fns, ..., .names = NULL, .unpack = FALSE) if_any(.cols, .fns, ..., .names = NULL) if_all(.cols, .fns, ..., .names = NULL) } \arguments{ \item{.cols}{<\code{\link[=dplyr_tidy_select]{tidy-select}}> Columns to transform. You can't select grouping columns because they are already automatically handled by the verb (i.e. \code{\link[=summarise]{summarise()}} or \code{\link[=mutate]{mutate()}}).} \item{.fns}{Functions to apply to each of the selected columns. Possible values are: \itemize{ \item A function, e.g. \code{mean}. \item A purrr-style lambda, e.g. \code{~ mean(.x, na.rm = TRUE)} \item A named list of functions or lambdas, e.g. \verb{list(mean = mean, n_miss = ~ sum(is.na(.x))}. Each function is applied to each column, and the output is named by combining the function name and the column name using the glue specification in \code{.names}. } Within these functions you can use \code{\link[=cur_column]{cur_column()}} and \code{\link[=cur_group]{cur_group()}} to access the current column and grouping keys respectively.} \item{...}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} Additional arguments for the function calls in \code{.fns} are no longer accepted in \code{...} because it's not clear when they should be evaluated: once per \code{across()} or once per group? Instead supply additional arguments directly in \code{.fns} by using a lambda. For example, instead of \code{across(a:b, mean, na.rm = TRUE)} write \code{across(a:b, ~ mean(.x, na.rm = TRUE))}.} \item{.names}{A glue specification that describes how to name the output columns. This can use \code{{.col}} to stand for the selected column name, and \code{{.fn}} to stand for the name of the function being applied. The default (\code{NULL}) is equivalent to \code{"{.col}"} for the single function case and \code{"{.col}_{.fn}"} for the case where a list is used for \code{.fns}.} \item{.unpack}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} Optionally \link[tidyr:pack]{unpack} data frames returned by functions in \code{.fns}, which expands the df-columns out into individual columns, retaining the number of rows in the data frame. \itemize{ \item If \code{FALSE}, the default, no unpacking is done. \item If \code{TRUE}, unpacking is done with a default glue specification of \code{"{outer}_{inner}"}. \item Otherwise, a single glue specification can be supplied to describe how to name the unpacked columns. This can use \code{{outer}} to refer to the name originally generated by \code{.names}, and \code{{inner}} to refer to the names of the data frame you are unpacking. }} } \value{ \code{across()} typically returns a tibble with one column for each column in \code{.cols} and each function in \code{.fns}. If \code{.unpack} is used, more columns may be returned depending on how the results of \code{.fns} are unpacked. \code{if_any()} and \code{if_all()} return a logical vector. } \description{ \code{across()} makes it easy to apply the same transformation to multiple columns, allowing you to use \code{\link[=select]{select()}} semantics inside in "data-masking" functions like \code{\link[=summarise]{summarise()}} and \code{\link[=mutate]{mutate()}}. See \code{vignette("colwise")} for more details. \code{if_any()} and \code{if_all()} apply the same predicate function to a selection of columns and combine the results into a single logical vector: \code{if_any()} is \code{TRUE} when the predicate is \code{TRUE} for \emph{any} of the selected columns, \code{if_all()} is \code{TRUE} when the predicate is \code{TRUE} for \emph{all} selected columns. If you just need to select columns without applying a transformation to each of them, then you probably want to use \code{\link[=pick]{pick()}} instead. \code{across()} supersedes the family of "scoped variants" like \code{summarise_at()}, \code{summarise_if()}, and \code{summarise_all()}. } \section{Timing of evaluation}{ R code in dplyr verbs is generally evaluated once per group. Inside \code{across()} however, code is evaluated once for each combination of columns and groups. If the evaluation timing is important, for example if you're generating random variables, think about when it should happen and place your code in consequence. \if{html}{\out{
}}\preformatted{gdf <- tibble(g = c(1, 1, 2, 3), v1 = 10:13, v2 = 20:23) \%>\% group_by(g) set.seed(1) # Outside: 1 normal variate n <- rnorm(1) gdf \%>\% mutate(across(v1:v2, ~ .x + n)) #> # A tibble: 4 x 3 #> # Groups: g [3] #> g v1 v2 #> #> 1 1 9.37 19.4 #> 2 1 10.4 20.4 #> 3 2 11.4 21.4 #> 4 3 12.4 22.4 # Inside a verb: 3 normal variates (ngroup) gdf \%>\% mutate(n = rnorm(1), across(v1:v2, ~ .x + n)) #> # A tibble: 4 x 4 #> # Groups: g [3] #> g v1 v2 n #> #> 1 1 10.2 20.2 0.184 #> 2 1 11.2 21.2 0.184 #> 3 2 11.2 21.2 -0.836 #> 4 3 14.6 24.6 1.60 # Inside `across()`: 6 normal variates (ncol * ngroup) gdf \%>\% mutate(across(v1:v2, ~ .x + rnorm(1))) #> # A tibble: 4 x 3 #> # Groups: g [3] #> g v1 v2 #> #> 1 1 10.3 20.7 #> 2 1 11.3 21.7 #> 3 2 11.2 22.6 #> 4 3 13.5 22.7 }\if{html}{\out{
}} } \examples{ # For better printing iris <- as_tibble(iris) # across() ----------------------------------------------------------------- # Different ways to select the same set of columns # See for details iris \%>\% mutate(across(c(Sepal.Length, Sepal.Width), round)) iris \%>\% mutate(across(c(1, 2), round)) iris \%>\% mutate(across(1:Sepal.Width, round)) iris \%>\% mutate(across(where(is.double) & !c(Petal.Length, Petal.Width), round)) # Using an external vector of names cols <- c("Sepal.Length", "Petal.Width") iris \%>\% mutate(across(all_of(cols), round)) # If the external vector is named, the output columns will be named according # to those names names(cols) <- tolower(cols) iris \%>\% mutate(across(all_of(cols), round)) # A purrr-style formula iris \%>\% group_by(Species) \%>\% summarise(across(starts_with("Sepal"), ~ mean(.x, na.rm = TRUE))) # A named list of functions iris \%>\% group_by(Species) \%>\% summarise(across(starts_with("Sepal"), list(mean = mean, sd = sd))) # Use the .names argument to control the output names iris \%>\% group_by(Species) \%>\% summarise(across(starts_with("Sepal"), mean, .names = "mean_{.col}")) iris \%>\% group_by(Species) \%>\% summarise(across(starts_with("Sepal"), list(mean = mean, sd = sd), .names = "{.col}.{.fn}")) # If a named external vector is used for column selection, .names will use # those names when constructing the output names iris \%>\% group_by(Species) \%>\% summarise(across(all_of(cols), mean, .names = "mean_{.col}")) # When the list is not named, .fn is replaced by the function's position iris \%>\% group_by(Species) \%>\% summarise(across(starts_with("Sepal"), list(mean, sd), .names = "{.col}.fn{.fn}")) # When the functions in .fns return a data frame, you typically get a # "packed" data frame back quantile_df <- function(x, probs = c(0.25, 0.5, 0.75)) { tibble(quantile = probs, value = quantile(x, probs)) } iris \%>\% reframe(across(starts_with("Sepal"), quantile_df)) # Use .unpack to automatically expand these packed data frames into their # individual columns iris \%>\% reframe(across(starts_with("Sepal"), quantile_df, .unpack = TRUE)) # .unpack can utilize a glue specification if you don't like the defaults iris \%>\% reframe(across(starts_with("Sepal"), quantile_df, .unpack = "{outer}.{inner}")) # This is also useful inside mutate(), for example, with a multi-lag helper multilag <- function(x, lags = 1:3) { names(lags) <- as.character(lags) purrr::map_dfr(lags, lag, x = x) } iris \%>\% group_by(Species) \%>\% mutate(across(starts_with("Sepal"), multilag, .unpack = TRUE)) \%>\% select(Species, starts_with("Sepal")) # if_any() and if_all() ---------------------------------------------------- iris \%>\% filter(if_any(ends_with("Width"), ~ . > 4)) iris \%>\% filter(if_all(ends_with("Width"), ~ . > 2)) } \seealso{ \code{\link[=c_across]{c_across()}} for a function that returns a vector } dplyr/man/filter.Rd0000644000176200001440000001306714406402754013750 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/filter.R \name{filter} \alias{filter} \title{Keep rows that match a condition} \usage{ filter(.data, ..., .by = NULL, .preserve = FALSE) } \arguments{ \item{.data}{A data frame, data frame extension (e.g. a tibble), or a lazy data frame (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for more details.} \item{...}{<\code{\link[rlang:args_data_masking]{data-masking}}> Expressions that return a logical value, and are defined in terms of the variables in \code{.data}. If multiple expressions are included, they are combined with the \code{&} operator. Only rows for which all conditions evaluate to \code{TRUE} are kept.} \item{.by}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} <\code{\link[=dplyr_tidy_select]{tidy-select}}> Optionally, a selection of columns to group by for just this operation, functioning as an alternative to \code{\link[=group_by]{group_by()}}. For details and examples, see \link[=dplyr_by]{?dplyr_by}.} \item{.preserve}{Relevant when the \code{.data} input is grouped. If \code{.preserve = FALSE} (the default), the grouping structure is recalculated based on the resulting data, otherwise the grouping is kept as is.} } \value{ An object of the same type as \code{.data}. The output has the following properties: \itemize{ \item Rows are a subset of the input, but appear in the same order. \item Columns are not modified. \item The number of groups may be reduced (if \code{.preserve} is not \code{TRUE}). \item Data frame attributes are preserved. } } \description{ The \code{filter()} function is used to subset a data frame, retaining all rows that satisfy your conditions. To be retained, the row must produce a value of \code{TRUE} for all conditions. Note that when a condition evaluates to \code{NA} the row will be dropped, unlike base subsetting with \code{[}. } \details{ The \code{filter()} function is used to subset the rows of \code{.data}, applying the expressions in \code{...} to the column values to determine which rows should be retained. It can be applied to both grouped and ungrouped data (see \code{\link[=group_by]{group_by()}} and \code{\link[=ungroup]{ungroup()}}). However, dplyr is not yet smart enough to optimise the filtering operation on grouped datasets that do not need grouped calculations. For this reason, filtering is often considerably faster on ungrouped data. } \section{Useful filter functions}{ There are many functions and operators that are useful when constructing the expressions used to filter the data: \itemize{ \item \code{\link{==}}, \code{\link{>}}, \code{\link{>=}} etc \item \code{\link{&}}, \code{\link{|}}, \code{\link{!}}, \code{\link[=xor]{xor()}} \item \code{\link[=is.na]{is.na()}} \item \code{\link[=between]{between()}}, \code{\link[=near]{near()}} } } \section{Grouped tibbles}{ Because filtering expressions are computed within groups, they may yield different results on grouped tibbles. This will be the case as soon as an aggregating, lagging, or ranking function is involved. Compare this ungrouped filtering: \if{html}{\out{
}}\preformatted{starwars \%>\% filter(mass > mean(mass, na.rm = TRUE)) }\if{html}{\out{
}} With the grouped equivalent: \if{html}{\out{
}}\preformatted{starwars \%>\% group_by(gender) \%>\% filter(mass > mean(mass, na.rm = TRUE)) }\if{html}{\out{
}} In the ungrouped version, \code{filter()} compares the value of \code{mass} in each row to the global average (taken over the whole data set), keeping only the rows with \code{mass} greater than this global average. In contrast, the grouped version calculates the average mass separately for each \code{gender} group, and keeps rows with \code{mass} greater than the relevant within-gender average. } \section{Methods}{ This function is a \strong{generic}, which means that packages can provide implementations (methods) for other classes. See the documentation of individual methods for extra arguments and differences in behaviour. The following methods are currently available in loaded packages: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("filter")}. } \examples{ # Filtering by one criterion filter(starwars, species == "Human") filter(starwars, mass > 1000) # Filtering by multiple criteria within a single logical expression filter(starwars, hair_color == "none" & eye_color == "black") filter(starwars, hair_color == "none" | eye_color == "black") # When multiple expressions are used, they are combined using & filter(starwars, hair_color == "none", eye_color == "black") # The filtering operation may yield different results on grouped # tibbles because the expressions are computed within groups. # # The following filters rows where `mass` is greater than the # global average: starwars \%>\% filter(mass > mean(mass, na.rm = TRUE)) # Whereas this keeps rows with `mass` greater than the gender # average: starwars \%>\% group_by(gender) \%>\% filter(mass > mean(mass, na.rm = TRUE)) # To refer to column names that are stored as strings, use the `.data` pronoun: vars <- c("mass", "height") cond <- c(80, 150) starwars \%>\% filter( .data[[vars[[1]]]] > cond[[1]], .data[[vars[[2]]]] > cond[[2]] ) # Learn more in ?rlang::args_data_masking } \seealso{ Other single table verbs: \code{\link{arrange}()}, \code{\link{mutate}()}, \code{\link{reframe}()}, \code{\link{rename}()}, \code{\link{select}()}, \code{\link{slice}()}, \code{\link{summarise}()} } \concept{single table verbs} dplyr/man/between.Rd0000644000176200001440000000166714366556340014125 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/funs.R \name{between} \alias{between} \title{Detect where values fall in a specified range} \usage{ between(x, left, right) } \arguments{ \item{x}{A vector} \item{left, right}{Boundary values. Both \code{left} and \code{right} are recycled to the size of \code{x}.} } \value{ A logical vector the same size as \code{x}. } \description{ This is a shortcut for \code{x >= left & x <= right}, implemented for local vectors and translated to the appropriate SQL for remote tables. } \details{ \code{x}, \code{left}, and \code{right} are all cast to their common type before the comparison is made. } \examples{ between(1:12, 7, 9) x <- rnorm(1e2) x[between(x, -1, 1)] # On a tibble using `filter()` filter(starwars, between(height, 100, 150)) } \seealso{ \code{\link[=join_by]{join_by()}} if you are looking for documentation for the \code{between()} overlap join helper. } dplyr/man/relocate.Rd0000644000176200001440000000443014366556340014261 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/relocate.R \name{relocate} \alias{relocate} \title{Change column order} \usage{ relocate(.data, ..., .before = NULL, .after = NULL) } \arguments{ \item{.data}{A data frame, data frame extension (e.g. a tibble), or a lazy data frame (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for more details.} \item{...}{<\code{\link[=dplyr_tidy_select]{tidy-select}}> Columns to move.} \item{.before, .after}{<\code{\link[=dplyr_tidy_select]{tidy-select}}> Destination of columns selected by \code{...}. Supplying neither will move columns to the left-hand side; specifying both is an error.} } \value{ An object of the same type as \code{.data}. The output has the following properties: \itemize{ \item Rows are not affected. \item The same columns appear in the output, but (usually) in a different place and possibly renamed. \item Data frame attributes are preserved. \item Groups are not affected. } } \description{ Use \code{relocate()} to change column positions, using the same syntax as \code{select()} to make it easy to move blocks of columns at once. } \section{Methods}{ This function is a \strong{generic}, which means that packages can provide implementations (methods) for other classes. See the documentation of individual methods for extra arguments and differences in behaviour. The following methods are currently available in loaded packages: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("relocate")}. } \examples{ df <- tibble(a = 1, b = 1, c = 1, d = "a", e = "a", f = "a") df \%>\% relocate(f) df \%>\% relocate(a, .after = c) df \%>\% relocate(f, .before = b) df \%>\% relocate(a, .after = last_col()) # relocated columns can change name df \%>\% relocate(ff = f) # Can also select variables based on their type df \%>\% relocate(where(is.character)) df \%>\% relocate(where(is.numeric), .after = last_col()) # Or with any other select helper df \%>\% relocate(any_of(c("a", "e", "i", "o", "u"))) # When .before or .after refers to multiple variables they will be # moved to be immediately before/after the selected variables. df2 <- tibble(a = 1, b = "a", c = 1, d = "a") df2 \%>\% relocate(where(is.numeric), .after = where(is.character)) df2 \%>\% relocate(where(is.numeric), .before = where(is.character)) } dplyr/man/group_cols.Rd0000644000176200001440000000172013663216626014635 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/select-helpers.R \name{group_cols} \alias{group_cols} \title{Select grouping variables} \usage{ group_cols(vars = NULL, data = NULL) } \arguments{ \item{vars}{Deprecated; please use data instead.} \item{data}{For advanced use only. The default \code{NULL} automatically finds the "current" data frames.} } \description{ This selection helpers matches grouping variables. It can be used in \code{\link[=select]{select()}} or \code{\link[=vars]{vars()}} selections. } \examples{ gdf <- iris \%>\% group_by(Species) gdf \%>\% select(group_cols()) # Remove the grouping variables from mutate selections: gdf \%>\% mutate_at(vars(-group_cols()), `/`, 100) # -> No longer necessary with across() gdf \%>\% mutate(across(everything(), ~ . / 100)) } \seealso{ \code{\link[=groups]{groups()}} and \code{\link[=group_vars]{group_vars()}} for retrieving the grouping variables outside selection contexts. } dplyr/man/compute.Rd0000644000176200001440000000412014366556340014133 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/compute-collect.R \name{compute} \alias{compute} \alias{collect} \alias{collapse} \title{Force computation of a database query} \usage{ compute(x, ...) collect(x, ...) collapse(x, ...) } \arguments{ \item{x}{A data frame, data frame extension (e.g. a tibble), or a lazy data frame (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for more details.} \item{...}{Arguments passed on to methods} } \description{ \code{compute()} stores results in a remote temporary table. \code{collect()} retrieves data into a local tibble. \code{collapse()} is slightly different: it doesn't force computation, but instead forces generation of the SQL query. This is sometimes needed to work around bugs in dplyr's SQL generation. All functions preserve grouping and ordering. } \section{Methods}{ These functions are \strong{generics}, which means that packages can provide implementations (methods) for other classes. See the documentation of individual methods for extra arguments and differences in behaviour. Methods available in currently loaded packages: \itemize{ \item \code{compute()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("compute")} \item \code{collect()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("collect")} \item \code{collapse()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("collapse")} } } \examples{ \dontshow{if (requireNamespace("dbplyr", quietly = TRUE) && requireNamespace("RSQLite", quietly = TRUE)) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} mtcars2 <- dbplyr::src_memdb() \%>\% copy_to(mtcars, name = "mtcars2-cc", overwrite = TRUE) remote <- mtcars2 \%>\% filter(cyl == 8) \%>\% select(mpg:drat) # Compute query and save in remote table compute(remote) # Compute query bring back to this session collect(remote) # Creates a fresh query based on the generated SQL collapse(remote) \dontshow{\}) # examplesIf} } \seealso{ \code{\link[=copy_to]{copy_to()}}, the opposite of \code{collect()}: it takes a local data frame and uploads it to the remote source. } dplyr/man/case_when.Rd0000644000176200001440000001220214472225345014407 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/case-when.R \name{case_when} \alias{case_when} \title{A general vectorised if-else} \usage{ case_when(..., .default = NULL, .ptype = NULL, .size = NULL) } \arguments{ \item{...}{<\code{\link[rlang:dyn-dots]{dynamic-dots}}> A sequence of two-sided formulas. The left hand side (LHS) determines which values match this case. The right hand side (RHS) provides the replacement value. The LHS inputs must evaluate to logical vectors. The RHS inputs will be coerced to their common type. All inputs will be recycled to their common size. That said, we encourage all LHS inputs to be the same size. Recycling is mainly useful for RHS inputs, where you might supply a size 1 input that will be recycled to the size of the LHS inputs. \code{NULL} inputs are ignored.} \item{.default}{The value used when all of the LHS inputs return either \code{FALSE} or \code{NA}. \code{.default} must be size 1 or the same size as the common size computed from \code{...}. \code{.default} participates in the computation of the common type with the RHS inputs. \code{NA} values in the LHS conditions are treated like \code{FALSE}, meaning that the result at those locations will be assigned the \code{.default} value. To handle missing values in the conditions differently, you must explicitly catch them with another condition before they fall through to the \code{.default}. This typically involves some variation of \code{is.na(x) ~ value} tailored to your usage of \code{case_when()}. If \code{NULL}, the default, a missing value will be used.} \item{.ptype}{An optional prototype declaring the desired output type. If supplied, this overrides the common type of the RHS inputs.} \item{.size}{An optional size declaring the desired output size. If supplied, this overrides the common size computed from \code{...}.} } \value{ A vector with the same size as the common size computed from the inputs in \code{...} and the same type as the common type of the RHS inputs in \code{...}. } \description{ This function allows you to vectorise multiple \code{\link[=if_else]{if_else()}} statements. Each case is evaluated sequentially and the first match for each element determines the corresponding value in the output vector. If no cases match, the \code{.default} is used as a final "else" statment. \code{case_when()} is an R equivalent of the SQL "searched" \verb{CASE WHEN} statement. } \examples{ x <- 1:70 case_when( x \%\% 35 == 0 ~ "fizz buzz", x \%\% 5 == 0 ~ "fizz", x \%\% 7 == 0 ~ "buzz", .default = as.character(x) ) # Like an if statement, the arguments are evaluated in order, so you must # proceed from the most specific to the most general. This won't work: case_when( x \%\% 5 == 0 ~ "fizz", x \%\% 7 == 0 ~ "buzz", x \%\% 35 == 0 ~ "fizz buzz", .default = as.character(x) ) # If none of the cases match and no `.default` is supplied, NA is used: case_when( x \%\% 35 == 0 ~ "fizz buzz", x \%\% 5 == 0 ~ "fizz", x \%\% 7 == 0 ~ "buzz", ) # Note that `NA` values on the LHS are treated like `FALSE` and will be # assigned the `.default` value. You must handle them explicitly if you # want to use a different value. The exact way to handle missing values is # dependent on the set of LHS conditions you use. x[2:4] <- NA_real_ case_when( x \%\% 35 == 0 ~ "fizz buzz", x \%\% 5 == 0 ~ "fizz", x \%\% 7 == 0 ~ "buzz", is.na(x) ~ "nope", .default = as.character(x) ) # `case_when()` evaluates all RHS expressions, and then constructs its # result by extracting the selected (via the LHS expressions) parts. # In particular `NaN`s are produced in this case: y <- seq(-2, 2, by = .5) case_when( y >= 0 ~ sqrt(y), .default = y ) # `case_when()` is particularly useful inside `mutate()` when you want to # create a new variable that relies on a complex combination of existing # variables starwars \%>\% select(name:mass, gender, species) \%>\% mutate( type = case_when( height > 200 | mass > 200 ~ "large", species == "Droid" ~ "robot", .default = "other" ) ) # `case_when()` is not a tidy eval function. If you'd like to reuse # the same patterns, extract the `case_when()` call in a normal # function: case_character_type <- function(height, mass, species) { case_when( height > 200 | mass > 200 ~ "large", species == "Droid" ~ "robot", .default = "other" ) } case_character_type(150, 250, "Droid") case_character_type(150, 150, "Droid") # Such functions can be used inside `mutate()` as well: starwars \%>\% mutate(type = case_character_type(height, mass, species)) \%>\% pull(type) # `case_when()` ignores `NULL` inputs. This is useful when you'd # like to use a pattern only under certain conditions. Here we'll # take advantage of the fact that `if` returns `NULL` when there is # no `else` clause: case_character_type <- function(height, mass, species, robots = TRUE) { case_when( height > 200 | mass > 200 ~ "large", if (robots) species == "Droid" ~ "robot", .default = "other" ) } starwars \%>\% mutate(type = case_character_type(height, mass, species, robots = FALSE)) \%>\% pull(type) } \seealso{ \code{\link[=case_match]{case_match()}} } dplyr/man/backend_dbplyr.Rd0000644000176200001440000000742113663216626015430 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/dbplyr.R \name{backend_dbplyr} \alias{backend_dbplyr} \alias{db_desc} \alias{sql_translate_env} \alias{db_list_tables} \alias{db_has_table} \alias{db_data_type} \alias{db_save_query} \alias{db_begin} \alias{db_commit} \alias{db_rollback} \alias{db_write_table} \alias{db_create_table} \alias{db_insert_into} \alias{db_create_indexes} \alias{db_create_index} \alias{db_drop_table} \alias{db_analyze} \alias{db_explain} \alias{db_query_fields} \alias{db_query_rows} \alias{sql_select} \alias{sql_subquery} \alias{sql_join} \alias{sql_semi_join} \alias{sql_set_op} \alias{sql_escape_string} \alias{sql_escape_ident} \title{Database and SQL generics.} \usage{ db_desc(x) sql_translate_env(con) db_list_tables(con) db_has_table(con, table) db_data_type(con, fields) db_save_query(con, sql, name, temporary = TRUE, ...) db_begin(con, ...) db_commit(con, ...) db_rollback(con, ...) db_write_table(con, table, types, values, temporary = FALSE, ...) db_create_table(con, table, types, temporary = FALSE, ...) db_insert_into(con, table, values, ...) db_create_indexes(con, table, indexes = NULL, unique = FALSE, ...) db_create_index(con, table, columns, name = NULL, unique = FALSE, ...) db_drop_table(con, table, force = FALSE, ...) db_analyze(con, table, ...) db_explain(con, sql, ...) db_query_fields(con, sql, ...) db_query_rows(con, sql, ...) sql_select( con, select, from, where = NULL, group_by = NULL, having = NULL, order_by = NULL, limit = NULL, distinct = FALSE, ... ) sql_subquery(con, from, name = random_table_name(), ...) sql_join(con, x, y, vars, type = "inner", by = NULL, ...) sql_semi_join(con, x, y, anti = FALSE, by = NULL, ...) sql_set_op(con, x, y, method) sql_escape_string(con, x) sql_escape_ident(con, x) } \arguments{ \item{con}{A database connection.} \item{table}{A string, the table name.} \item{fields}{A list of fields, as in a data frame.} } \value{ Usually a logical value indicating success. Most failures should generate an error. However, \code{db_has_table()} should return \code{NA} if temporary tables cannot be listed with \code{\link[DBI:dbListTables]{DBI::dbListTables()}} (due to backend API limitations for example). As a result, you methods will rely on the backend to throw an error if a table exists when it shouldn't. } \description{ The \code{sql_} generics are used to build the different types of SQL queries. The default implementations in dbplyr generates ANSI 92 compliant SQL. The \code{db_} generics execute actions on the database. The default implementations in dbplyr typically just call the standard DBI S4 method. } \details{ A few backend methods do not call the standard DBI S4 methods including \itemize{ \item \code{db_data_type()}: Calls \code{\link[DBI:dbDataType]{DBI::dbDataType()}} for every field (e.g. data frame column) and returns a vector of corresponding SQL data types \item \code{db_save_query()}: Builds and executes a \verb{CREATE [TEMPORARY] TABLE ...} SQL command. \item \code{db_create_index()}: Builds and executes a \verb{CREATE INDEX ON
} SQL command. \item \code{db_drop_table()}: Builds and executes a \verb{DROP TABLE [IF EXISTS]
} SQL command. \item \code{db_analyze()}: Builds and executes an \verb{ANALYZE
} SQL command. } Currently, \code{\link[=copy_to]{copy_to()}} is the only user of \code{db_begin()}, \code{db_commit()}, \code{db_rollback()}, \code{db_write_table()}, \code{db_create_indexes()}, \code{db_drop_table()} and \code{db_analyze()}. If you find yourself overriding many of these functions it may suggest that you should just override \code{copy_to()} instead. \code{db_create_table()} and \code{db_insert_into()} have been deprecated in favour of \code{db_write_table()}. } \keyword{internal} dplyr/man/with_groups.Rd0000644000176200001440000000242614366556340015040 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/groups-with.R \name{with_groups} \alias{with_groups} \title{Perform an operation with temporary groups} \usage{ with_groups(.data, .groups, .f, ...) } \arguments{ \item{.data}{A data frame} \item{.groups}{<\code{\link[=dplyr_tidy_select]{tidy-select}}> One or more variables to group by. Unlike \code{\link[=group_by]{group_by()}}, you can only group by existing variables, and you can use tidy-select syntax like \code{c(x, y, z)} to select multiple variables. Use \code{NULL} to temporarily \strong{un}group.} \item{.f}{Function to apply to regrouped data. Supports purrr-style \code{~} syntax} \item{...}{Additional arguments passed on to \code{...}.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#superseded}{\figure{lifecycle-superseded.svg}{options: alt='[Superseded]'}}}{\strong{[Superseded]}} This was an experimental function that allows you to modify the grouping variables for a single operation; it is superseded in favour of using the \code{.by} argument to individual verbs. } \examples{ df <- tibble(g = c(1, 1, 2, 2, 3), x = runif(5)) # Old df \%>\% with_groups(g, mutate, x_mean = mean(x)) # New df \%>\% mutate(x_mean = mean(x), .by = g) } \keyword{internal} dplyr/man/copy_to.Rd0000644000176200001440000000263014366556340014137 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/copy-to.R \name{copy_to} \alias{copy_to} \title{Copy a local data frame to a remote src} \usage{ copy_to(dest, df, name = deparse(substitute(df)), overwrite = FALSE, ...) } \arguments{ \item{dest}{remote data source} \item{df}{local data frame} \item{name}{name for new remote table.} \item{overwrite}{If \code{TRUE}, will overwrite an existing table with name \code{name}. If \code{FALSE}, will throw an error if \code{name} already exists.} \item{...}{other parameters passed to methods.} } \value{ a \code{tbl} object in the remote source } \description{ This function uploads a local data frame into a remote data source, creating the table definition as needed. Wherever possible, the new object will be temporary, limited to the current connection to the source. } \section{Methods}{ This function is a \strong{generic}, which means that packages can provide implementations (methods) for other classes. See the documentation of individual methods for extra arguments and differences in behaviour. The following methods are currently available in loaded packages: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("copy_to")}. } \examples{ \dontrun{ iris2 <- dbplyr::src_memdb() \%>\% copy_to(iris, overwrite = TRUE) iris2 } } \seealso{ \code{\link[=collect]{collect()}} for the opposite action; downloading remote data into a local dbl. } dplyr/man/tbl_ptype.Rd0000644000176200001440000000071513663216626014466 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/colwise.R \name{tbl_ptype} \alias{tbl_ptype} \title{Return a prototype of a tbl} \usage{ tbl_ptype(.data) } \description{ Used in \verb{_if} functions to enable type-based selection even when the data is lazily generated. Should either return the complete tibble, or if that can not be computed quickly, a 0-row tibble where the columns are of the correct type. } \keyword{internal} dplyr/man/combine.Rd0000644000176200001440000000154214266276767014113 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/deprec-combine.R \name{combine} \alias{combine} \title{Combine vectors} \usage{ combine(...) } \arguments{ \item{...}{Vectors to combine.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} \code{combine()} is deprecated in favour of \code{\link[vctrs:vec_c]{vctrs::vec_c()}}. \code{combine()} attempted to automatically guess whether you wanted \code{\link[=c]{c()}} or \code{\link[=unlist]{unlist()}}, but could fail in surprising ways. We now believe it's better to be explicit. } \examples{ f1 <- factor("a") f2 <- factor("b") combine(f1, f2) # -> vctrs::vec_c(f1, f1) combine(list(f1, f2)) # -> vctrs::vec_c(!!!list(f1, f2)) } \keyword{internal} dplyr/man/all_equal.Rd0000644000176200001440000000316314366556340014424 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/all-equal.R \name{all_equal} \alias{all_equal} \title{Flexible equality comparison for data frames} \usage{ all_equal( target, current, ignore_col_order = TRUE, ignore_row_order = TRUE, convert = FALSE, ... ) } \arguments{ \item{target, current}{Two data frames to compare.} \item{ignore_col_order}{Should order of columns be ignored?} \item{ignore_row_order}{Should order of rows be ignored?} \item{convert}{Should similar classes be converted? Currently this will convert factor to character and integer to double.} \item{...}{Ignored. Needed for compatibility with \code{all.equal()}.} } \value{ \code{TRUE} if equal, otherwise a character vector describing the reasons why they're not equal. Use \code{\link[=isTRUE]{isTRUE()}} if using the result in an \code{if} expression. } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} \code{all_equal()} allows you to compare data frames, optionally ignoring row and column names. It is deprecated as of dplyr 1.1.0, because it makes it too easy to ignore important differences. } \examples{ scramble <- function(x) x[sample(nrow(x)), sample(ncol(x))] # `all_equal()` ignored row and column ordering by default, # but we now feel that that makes it too easy to make mistakes mtcars2 <- scramble(mtcars) all_equal(mtcars, mtcars2) # Instead, be explicit about the row and column ordering all.equal( mtcars, mtcars2[rownames(mtcars), names(mtcars)] ) } \keyword{internal} dplyr/man/join_by.Rd0000644000176200001440000002233614366556340014121 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/join-by.R \name{join_by} \alias{join_by} \alias{closest} \alias{overlaps} \alias{within} \title{Join specifications} \usage{ join_by(...) } \arguments{ \item{...}{Expressions specifying the join. Each expression should consist of one of the following: \itemize{ \item Equality condition: \code{==} \item Inequality conditions: \code{>=}, \code{>}, \code{<=}, or \code{<} \item Rolling helper: \code{closest()} \item Overlap helpers: \code{between()}, \code{within()}, or \code{overlaps()} } Other expressions are not supported. If you need to perform a join on a computed variable, e.g. \code{join_by(sales_date - 40 >= promo_date)}, you'll need to precompute and store it in a separate column. Column names should be specified as quoted or unquoted names. By default, the name on the left-hand side of a join condition refers to the left-hand table, unless overridden by explicitly prefixing the column name with either \verb{x$} or \verb{y$}. If a single column name is provided without any join conditions, it is interpreted as if that column name was duplicated on each side of \code{==}, i.e. \code{x} is interpreted as \code{x == x}.} } \description{ \code{join_by()} constructs a specification that describes how to join two tables using a small domain specific language. The result can be supplied as the \code{by} argument to any of the join functions (such as \code{\link[=left_join]{left_join()}}). } \section{Join types}{ The following types of joins are supported by dplyr: \itemize{ \item Equality joins \item Inequality joins \item Rolling joins \item Overlap joins \item Cross joins } Equality, inequality, rolling, and overlap joins are discussed in more detail below. Cross joins are implemented through \code{\link[=cross_join]{cross_join()}}. \subsection{Equality joins}{ Equality joins require keys to be equal between one or more pairs of columns, and are the most common type of join. To construct an equality join using \code{join_by()}, supply two column names to join with separated by \code{==}. Alternatively, supplying a single name will be interpreted as an equality join between two columns of the same name. For example, \code{join_by(x)} is equivalent to \code{join_by(x == x)}. } \subsection{Inequality joins}{ Inequality joins match on an inequality, such as \code{>}, \code{>=}, \code{<}, or \code{<=}, and are common in time series analysis and genomics. To construct an inequality join using \code{join_by()}, supply two column names separated by one of the above mentioned inequalities. Note that inequality joins will match a single row in \code{x} to a potentially large number of rows in \code{y}. Be extra careful when constructing inequality join specifications! } \subsection{Rolling joins}{ Rolling joins are a variant of inequality joins that limit the results returned from an inequality join condition. They are useful for "rolling" the closest match forward/backwards when there isn't an exact match. To construct a rolling join, wrap an inequality with \code{closest()}. \itemize{ \item \code{closest(expr)} \code{expr} must be an inequality involving one of: \code{>}, \code{>=}, \code{<}, or \code{<=}. For example, \code{closest(x >= y)} is interpreted as: For each value in \code{x}, find the closest value in \code{y} that is less than or equal to that \code{x} value. } \code{closest()} will always use the left-hand table (\code{x}) as the primary table, and the right-hand table (\code{y}) as the one to find the closest match in, regardless of how the inequality is specified. For example, \code{closest(y$a >= x$b)} will always be interpreted as \code{closest(x$b <= y$a)}. } \subsection{Overlap joins}{ Overlap joins are a special case of inequality joins involving one or two columns from the left-hand table \emph{overlapping} a range defined by two columns from the right-hand table. There are three helpers that \code{join_by()} recognizes to assist with constructing overlap joins, all of which can be constructed from simpler inequalities. \itemize{ \item \code{between(x, y_lower, y_upper, ..., bounds = "[]")} For each value in \code{x}, this finds everywhere that value falls between \verb{[y_lower, y_upper]}. Equivalent to \verb{x >= y_lower, x <= y_upper} by default. \code{bounds} can be one of \code{"[]"}, \code{"[)"}, \code{"(]"}, or \code{"()"} to alter the inclusiveness of the lower and upper bounds. This changes whether \code{>=} or \code{>} and \code{<=} or \code{<} are used to build the inequalities shown above. Dots are for future extensions and must be empty. \item \code{within(x_lower, x_upper, y_lower, y_upper)} For each range in \verb{[x_lower, x_upper]}, this finds everywhere that range falls completely within \verb{[y_lower, y_upper]}. Equivalent to \verb{x_lower >= y_lower, x_upper <= y_upper}. The inequalities used to build \code{within()} are the same regardless of the inclusiveness of the supplied ranges. \item \code{overlaps(x_lower, x_upper, y_lower, y_upper, ..., bounds = "[]")} For each range in \verb{[x_lower, x_upper]}, this finds everywhere that range overlaps \verb{[y_lower, y_upper]} in any capacity. Equivalent to \verb{x_lower <= y_upper, x_upper >= y_lower} by default. \code{bounds} can be one of \code{"[]"}, \code{"[)"}, \code{"(]"}, or \code{"()"} to alter the inclusiveness of the lower and upper bounds. \code{"[]"} uses \code{<=} and \code{>=}, but the 3 other options use \code{<} and \code{>} and generate the exact same inequalities. Dots are for future extensions and must be empty. } These conditions assume that the ranges are well-formed and non-empty, i.e. \code{x_lower <= x_upper} when bounds are treated as \code{"[]"}, and \code{x_lower < x_upper} otherwise. } } \section{Column referencing}{ When specifying join conditions, \code{join_by()} assumes that column names on the left-hand side of the condition refer to the left-hand table (\code{x}), and names on the right-hand side of the condition refer to the right-hand table (\code{y}). Occasionally, it is clearer to be able to specify a right-hand table name on the left-hand side of the condition, and vice versa. To support this, column names can be prefixed by \verb{x$} or \verb{y$} to explicitly specify which table they come from. } \examples{ sales <- tibble( id = c(1L, 1L, 1L, 2L, 2L), sale_date = as.Date(c("2018-12-31", "2019-01-02", "2019-01-05", "2019-01-04", "2019-01-01")) ) sales promos <- tibble( id = c(1L, 1L, 2L), promo_date = as.Date(c("2019-01-01", "2019-01-05", "2019-01-02")) ) promos # Match `id` to `id`, and `sale_date` to `promo_date` by <- join_by(id, sale_date == promo_date) left_join(sales, promos, by) # For each `sale_date` within a particular `id`, # find all `promo_date`s that occurred before that particular sale by <- join_by(id, sale_date >= promo_date) left_join(sales, promos, by) # For each `sale_date` within a particular `id`, # find only the closest `promo_date` that occurred before that sale by <- join_by(id, closest(sale_date >= promo_date)) left_join(sales, promos, by) # If you want to disallow exact matching in rolling joins, use `>` rather # than `>=`. Note that the promo on `2019-01-05` is no longer considered the # closest match for the sale on the same date. by <- join_by(id, closest(sale_date > promo_date)) left_join(sales, promos, by) # Same as before, but also require that the promo had to occur at most 1 # day before the sale was made. We'll use a full join to see that id 2's # promo on `2019-01-02` is no longer matched to the sale on `2019-01-04`. sales <- mutate(sales, sale_date_lower = sale_date - 1) by <- join_by(id, closest(sale_date >= promo_date), sale_date_lower <= promo_date) full_join(sales, promos, by) # --------------------------------------------------------------------------- segments <- tibble( segment_id = 1:4, chromosome = c("chr1", "chr2", "chr2", "chr1"), start = c(140, 210, 380, 230), end = c(150, 240, 415, 280) ) segments reference <- tibble( reference_id = 1:4, chromosome = c("chr1", "chr1", "chr2", "chr2"), start = c(100, 200, 300, 415), end = c(150, 250, 399, 450) ) reference # Find every time a segment `start` falls between the reference # `[start, end]` range. by <- join_by(chromosome, between(start, start, end)) full_join(segments, reference, by) # If you wanted the reference columns first, supply `reference` as `x` # and `segments` as `y`, then explicitly refer to their columns using `x$` # and `y$`. by <- join_by(chromosome, between(y$start, x$start, x$end)) full_join(reference, segments, by) # Find every time a segment falls completely within a reference. # Sometimes using `x$` and `y$` makes your intentions clearer, even if they # match the default behavior. by <- join_by(chromosome, within(x$start, x$end, y$start, y$end)) inner_join(segments, reference, by) # Find every time a segment overlaps a reference in any way. by <- join_by(chromosome, overlaps(x$start, x$end, y$start, y$end)) full_join(segments, reference, by) # It is common to have right-open ranges with bounds like `[)`, which would # mean an end value of `415` would no longer overlap a start value of `415`. # Setting `bounds` allows you to compute overlaps with those kinds of ranges. by <- join_by(chromosome, overlaps(x$start, x$end, y$start, y$end, bounds = "[)")) full_join(segments, reference, by) } dplyr/man/summarise_all.Rd0000644000176200001440000001334614366556340015326 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/colwise-mutate.R \name{summarise_all} \alias{summarise_all} \alias{summarise_if} \alias{summarise_at} \alias{summarize_all} \alias{summarize_if} \alias{summarize_at} \title{Summarise multiple columns} \usage{ summarise_all(.tbl, .funs, ...) summarise_if(.tbl, .predicate, .funs, ...) summarise_at(.tbl, .vars, .funs, ..., .cols = NULL) summarize_all(.tbl, .funs, ...) summarize_if(.tbl, .predicate, .funs, ...) summarize_at(.tbl, .vars, .funs, ..., .cols = NULL) } \arguments{ \item{.tbl}{A \code{tbl} object.} \item{.funs}{A function \code{fun}, a quosure style lambda \code{~ fun(.)} or a list of either form.} \item{...}{Additional arguments for the function calls in \code{.funs}. These are evaluated only once, with \link[rlang:dyn-dots]{tidy dots} support.} \item{.predicate}{A predicate function to be applied to the columns or a logical vector. The variables for which \code{.predicate} is or returns \code{TRUE} are selected. This argument is passed to \code{\link[rlang:as_function]{rlang::as_function()}} and thus supports quosure-style lambda functions and strings representing function names.} \item{.vars}{A list of columns generated by \code{\link[=vars]{vars()}}, a character vector of column names, a numeric vector of column positions, or \code{NULL}.} \item{.cols}{This argument has been renamed to \code{.vars} to fit dplyr's terminology and is deprecated.} } \value{ A data frame. By default, the newly created columns have the shortest names needed to uniquely identify the output. To force inclusion of a name, even when not needed, name the input (see examples for details). } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#superseded}{\figure{lifecycle-superseded.svg}{options: alt='[Superseded]'}}}{\strong{[Superseded]}} Scoped verbs (\verb{_if}, \verb{_at}, \verb{_all}) have been superseded by the use of \code{\link[=pick]{pick()}} or \code{\link[=across]{across()}} in an existing verb. See \code{vignette("colwise")} for details. The \link{scoped} variants of \code{\link[=summarise]{summarise()}} make it easy to apply the same transformation to multiple variables. There are three variants. \itemize{ \item \code{summarise_all()} affects every variable \item \code{summarise_at()} affects variables selected with a character vector or vars() \item \code{summarise_if()} affects variables selected with a predicate function } } \section{Grouping variables}{ If applied on a grouped tibble, these operations are \emph{not} applied to the grouping variables. The behaviour depends on whether the selection is \strong{implicit} (\code{all} and \code{if} selections) or \strong{explicit} (\code{at} selections). \itemize{ \item Grouping variables covered by explicit selections in \code{summarise_at()} are always an error. Add \code{-group_cols()} to the \code{\link[=vars]{vars()}} selection to avoid this: \if{html}{\out{
}}\preformatted{data \%>\% summarise_at(vars(-group_cols(), ...), myoperation) }\if{html}{\out{
}} Or remove \code{group_vars()} from the character vector of column names: \if{html}{\out{
}}\preformatted{nms <- setdiff(nms, group_vars(data)) data \%>\% summarise_at(nms, myoperation) }\if{html}{\out{
}} \item Grouping variables covered by implicit selections are silently ignored by \code{summarise_all()} and \code{summarise_if()}. } } \section{Naming}{ The names of the new columns are derived from the names of the input variables and the names of the functions. \itemize{ \item if there is only one unnamed function (i.e. if \code{.funs} is an unnamed list of length one), the names of the input variables are used to name the new columns; \item for \verb{_at} functions, if there is only one unnamed variable (i.e., if \code{.vars} is of the form \code{vars(a_single_column)}) and \code{.funs} has length greater than one, the names of the functions are used to name the new columns; \item otherwise, the new names are created by concatenating the names of the input variables and the names of the functions, separated with an underscore \code{"_"}. } The \code{.funs} argument can be a named or unnamed list. If a function is unnamed and the name cannot be derived automatically, a name of the form "fn#" is used. Similarly, \code{\link[=vars]{vars()}} accepts named and unnamed arguments. If a variable in \code{.vars} is named, a new column by that name will be created. Name collisions in the new columns are disambiguated using a unique suffix. } \examples{ # The _at() variants directly support strings: starwars \%>\% summarise_at(c("height", "mass"), mean, na.rm = TRUE) # -> starwars \%>\% summarise(across(c("height", "mass"), ~ mean(.x, na.rm = TRUE))) # You can also supply selection helpers to _at() functions but you have # to quote them with vars(): starwars \%>\% summarise_at(vars(height:mass), mean, na.rm = TRUE) # -> starwars \%>\% summarise(across(height:mass, ~ mean(.x, na.rm = TRUE))) # The _if() variants apply a predicate function (a function that # returns TRUE or FALSE) to determine the relevant subset of # columns. Here we apply mean() to the numeric columns: starwars \%>\% summarise_if(is.numeric, mean, na.rm = TRUE) starwars \%>\% summarise(across(where(is.numeric), ~ mean(.x, na.rm = TRUE))) by_species <- iris \%>\% group_by(Species) # If you want to apply multiple transformations, pass a list of # functions. When there are multiple functions, they create new # variables instead of modifying the variables in place: by_species \%>\% summarise_all(list(min, max)) # -> by_species \%>\% summarise(across(everything(), list(min = min, max = max))) } \seealso{ \link[=scoped]{The other scoped verbs}, \code{\link[=vars]{vars()}} } \keyword{internal} dplyr/man/row_number.Rd0000644000176200001440000000465414366556340014652 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/rank.R \name{row_number} \alias{row_number} \alias{min_rank} \alias{dense_rank} \title{Integer ranking functions} \usage{ row_number(x) min_rank(x) dense_rank(x) } \arguments{ \item{x}{A vector to rank By default, the smallest values will get the smallest ranks. Use \code{\link[=desc]{desc()}} to reverse the direction so the largest values get the smallest ranks. Missing values will be given rank \code{NA}. Use \code{coalesce(x, Inf)} or \code{coalesce(x, -Inf)} if you want to treat them as the largest or smallest values respectively. To rank by multiple columns at once, supply a data frame.} } \value{ An integer vector. } \description{ Three ranking functions inspired by SQL2003. They differ primarily in how they handle ties: \itemize{ \item \code{row_number()} gives every input a unique rank, so that \code{c(10, 20, 20, 30)} would get ranks \code{c(1, 2, 3, 4)}. It's equivalent to \code{rank(ties.method = "first")}. \item \code{min_rank()} gives every tie the same (smallest) value so that \code{c(10, 20, 20, 30)} gets ranks \code{c(1, 2, 2, 4)}. It's the way that ranks are usually computed in sports and is equivalent to \code{rank(ties.method = "min")}. \item \code{dense_rank()} works like \code{min_rank()}, but doesn't leave any gaps, so that \code{c(10, 20, 20, 30)} gets ranks \code{c(1, 2, 2, 3)}. } } \examples{ x <- c(5, 1, 3, 2, 2, NA) row_number(x) min_rank(x) dense_rank(x) # Ranking functions can be used in `filter()` to select top/bottom rows df <- data.frame( grp = c(1, 1, 1, 2, 2, 2, 3, 3, 3), x = c(3, 2, 1, 1, 2, 2, 1, 1, 1), y = c(1, 3, 2, 3, 2, 2, 4, 1, 2), id = 1:9 ) # Always gives exactly 1 row per group df \%>\% group_by(grp) \%>\% filter(row_number(x) == 1) # May give more than 1 row if ties df \%>\% group_by(grp) \%>\% filter(min_rank(x) == 1) # Rank by multiple columns (to break ties) by selecting them with `pick()` df \%>\% group_by(grp) \%>\% filter(min_rank(pick(x, y)) == 1) # See slice_min() and slice_max() for another way to tackle the same problem # You can use row_number() without an argument to refer to the "current" # row number. df \%>\% group_by(grp) \%>\% filter(row_number() == 1) # It's easiest to see what this does with mutate(): df \%>\% group_by(grp) \%>\% mutate(grp_id = row_number()) } \seealso{ Other ranking functions: \code{\link{ntile}()}, \code{\link{percent_rank}()} } \concept{ranking functions} dplyr/man/coalesce.Rd0000644000176200001440000000320114525503021014215 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/coalesce.R \name{coalesce} \alias{coalesce} \title{Find the first non-missing element} \usage{ coalesce(..., .ptype = NULL, .size = NULL) } \arguments{ \item{...}{<\code{\link[rlang:dyn-dots]{dynamic-dots}}> One or more vectors. These will be \link[vctrs:theory-faq-recycling]{recycled} against each other, and will be cast to their common type.} \item{.ptype}{An optional prototype declaring the desired output type. If supplied, this overrides the common type of the vectors in \code{...}.} \item{.size}{An optional size declaring the desired output size. If supplied, this overrides the common size of the vectors in \code{...}.} } \value{ A vector with the same type and size as the common type and common size of the vectors in \code{...}. } \description{ Given a set of vectors, \code{coalesce()} finds the first non-missing value at each position. It's inspired by the SQL \code{COALESCE} function which does the same thing for SQL \code{NULL}s. } \examples{ # Use a single value to replace all missing values x <- sample(c(1:5, NA, NA, NA)) coalesce(x, 0L) # The equivalent to a missing value in a list is `NULL` coalesce(list(1, 2, NULL), list(NA)) # Or generate a complete vector from partially missing pieces y <- c(1, 2, NA, NA, 5) z <- c(NA, NA, 3, 4, 5) coalesce(y, z) # Supply lists by splicing them into dots: vecs <- list( c(1, 2, NA, NA, 5), c(NA, NA, 3, 4, 5) ) coalesce(!!!vecs) } \seealso{ \code{\link[=na_if]{na_if()}} to replace specified values with an \code{NA}. \code{\link[tidyr:replace_na]{tidyr::replace_na()}} to replace \code{NA} with a value. } dplyr/man/order_by.Rd0000644000176200001440000000210113663216626014260 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/order-by.R \name{order_by} \alias{order_by} \title{A helper function for ordering window function output} \usage{ order_by(order_by, call) } \arguments{ \item{order_by}{a vector to order_by} \item{call}{a function call to a window function, where the first argument is the vector being operated on} } \description{ This function makes it possible to control the ordering of window functions in R that don't have a specific ordering parameter. When translated to SQL it will modify the order clause of the OVER function. } \details{ This function works by changing the \code{call} to instead call \code{\link[=with_order]{with_order()}} with the appropriate arguments. } \examples{ order_by(10:1, cumsum(1:10)) x <- 10:1 y <- 1:10 order_by(x, cumsum(y)) df <- data.frame(year = 2000:2005, value = (0:5) ^ 2) scrambled <- df[sample(nrow(df)), ] wrong <- mutate(scrambled, running = cumsum(value)) arrange(wrong, year) right <- mutate(scrambled, running = order_by(year, cumsum(value))) arrange(right, year) } dplyr/man/progress_estimated.Rd0000644000176200001440000000312614266276767016402 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/progress.R \name{progress_estimated} \alias{progress_estimated} \title{Progress bar with estimated time.} \usage{ progress_estimated(n, min_time = 0) } \arguments{ \item{n}{Total number of items} \item{min_time}{Progress bar will wait until at least \code{min_time} seconds have elapsed before displaying any results.} } \value{ A ref class with methods \code{tick()}, \code{print()}, \code{pause()}, and \code{stop()}. } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} This progress bar has been deprecated since providing progress bars is not the responsibility of dplyr. Instead, you might try the more powerful \href{https://github.com/r-lib/progress}{progress} package. This reference class represents a text progress bar displayed estimated time remaining. When finished, it displays the total duration. The automatic progress bar can be disabled by setting option \code{dplyr.show_progress} to \code{FALSE}. } \examples{ p <- progress_estimated(3) p$tick() p$tick() p$tick() p <- progress_estimated(3) for (i in 1:3) p$pause(0.1)$tick()$print() p <- progress_estimated(3) p$tick()$print()$ pause(1)$stop() # If min_time is set, progress bar not shown until that many # seconds have elapsed p <- progress_estimated(3, min_time = 3) for (i in 1:3) p$pause(0.1)$tick()$print() \dontrun{ p <- progress_estimated(10, min_time = 3) for (i in 1:10) p$pause(0.5)$tick()$print() } } \keyword{internal} dplyr/man/defunct.Rd0000644000176200001440000000272614366556340014121 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/defunct.R \name{defunct} \alias{defunct} \alias{id} \alias{failwith} \alias{select_vars} \alias{rename_vars} \alias{select_var} \alias{current_vars} \alias{bench_tbls} \alias{compare_tbls} \alias{compare_tbls2} \alias{eval_tbls} \alias{eval_tbls2} \alias{location} \alias{changes} \title{Defunct functions} \usage{ # Deprecated in 0.5.0 ------------------------------------- id(.variables, drop = FALSE) # Deprecated in 0.7.0 ------------------------------------- failwith(default = NULL, f, quiet = FALSE) # Deprecated in 0.8.* ------------------------------------- select_vars(vars = chr(), ..., include = chr(), exclude = chr()) rename_vars(vars = chr(), ..., strict = TRUE) select_var(vars, var = -1) current_vars(...) # Deprecated in 1.0.0 ------------------------------------- bench_tbls(tbls, op, ..., times = 10) compare_tbls(tbls, op, ref = NULL, compare = equal_data_frame, ...) compare_tbls2(tbls_x, tbls_y, op, ref = NULL, compare = equal_data_frame, ...) eval_tbls(tbls, op) eval_tbls2(tbls_x, tbls_y, op) location(df) changes(x, y) } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#defunct}{\figure{lifecycle-defunct.svg}{options: alt='[Defunct]'}}}{\strong{[Defunct]}} These functions were deprecated for at least two years before being made defunct. If there's a known replacement, calling the function will tell you about it. } \keyword{internal} dplyr/man/se-deprecated.Rd0000644000176200001440000000743214406402754015167 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/deprec-lazyeval.R \name{se-deprecated} \alias{se-deprecated} \alias{add_count_} \alias{add_tally_} \alias{arrange_} \alias{count_} \alias{distinct_} \alias{do_} \alias{filter_} \alias{funs_} \alias{group_by_} \alias{group_indices_} \alias{mutate_} \alias{tally_} \alias{transmute_} \alias{rename_} \alias{rename_vars_} \alias{select_} \alias{select_vars_} \alias{slice_} \alias{summarise_} \alias{summarize_} \title{Deprecated SE versions of main verbs.} \usage{ add_count_(x, vars, wt = NULL, sort = FALSE) add_tally_(x, wt, sort = FALSE) arrange_(.data, ..., .dots = list()) count_(x, vars, wt = NULL, sort = FALSE, .drop = group_by_drop_default(x)) distinct_(.data, ..., .dots, .keep_all = FALSE) do_(.data, ..., .dots = list()) filter_(.data, ..., .dots = list()) funs_(dots, args = list(), env = base_env()) group_by_(.data, ..., .dots = list(), add = FALSE) group_indices_(.data, ..., .dots = list()) mutate_(.data, ..., .dots = list()) tally_(x, wt, sort = FALSE) transmute_(.data, ..., .dots = list()) rename_(.data, ..., .dots = list()) rename_vars_(vars, args) select_(.data, ..., .dots = list()) select_vars_(vars, args, include = chr(), exclude = chr()) slice_(.data, ..., .dots = list()) summarise_(.data, ..., .dots = list()) summarize_(.data, ..., .dots = list()) } \arguments{ \item{x}{A \code{\link[=tbl]{tbl()}}} \item{vars}{Various meanings depending on the verb.} \item{wt}{<\code{\link[rlang:args_data_masking]{data-masking}}> Frequency weights. Can be \code{NULL} or a variable: \itemize{ \item If \code{NULL} (the default), counts the number of rows in each group. \item If a variable, computes \code{sum(wt)} for each group. }} \item{sort}{If \code{TRUE}, will show the largest groups at the top.} \item{.data}{A data frame.} \item{.drop}{Drop groups formed by factor levels that don't appear in the data? The default is \code{TRUE} except when \code{.data} has been previously grouped with \code{.drop = FALSE}. See \code{\link[=group_by_drop_default]{group_by_drop_default()}} for details.} \item{.keep_all}{If \code{TRUE}, keep all variables in \code{.data}. If a combination of \code{...} is not distinct, this keeps the first row of values.} \item{dots, .dots, ...}{Pair/values of expressions coercible to lazy objects.} \item{args}{Various meanings depending on the verb.} \item{env}{The environment in which functions should be evaluated.} \item{add}{When \code{FALSE}, the default, \code{group_by()} will override existing groups. To add to the existing groups, use \code{.add = TRUE}. This argument was previously called \code{add}, but that prevented creating a new grouping variable called \code{add}, and conflicts with our naming conventions.} \item{include, exclude}{Character vector of column names to always include/exclude.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} dplyr used to offer twin versions of each verb suffixed with an underscore. These versions had standard evaluation (SE) semantics: rather than taking arguments by code, like NSE verbs, they took arguments by value. Their purpose was to make it possible to program with dplyr. However, dplyr now uses tidy evaluation semantics. NSE verbs still capture their arguments, but you can now unquote parts of these arguments. This offers full programmability with NSE verbs. Thus, the underscored versions are now superfluous. Unquoting triggers immediate evaluation of its operand and inlines the result within the captured expression. This result can be a value or an expression to be evaluated later with the rest of the argument. See \code{vignette("programming")} for more information. } \keyword{internal} dplyr/man/group_data.Rd0000644000176200001440000000355014366556340014612 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/group-data.R \name{group_data} \alias{group_data} \alias{group_keys} \alias{group_rows} \alias{group_indices} \alias{group_vars} \alias{groups} \alias{group_size} \alias{n_groups} \title{Grouping metadata} \usage{ group_data(.data) group_keys(.tbl, ...) group_rows(.data) group_indices(.data, ...) group_vars(x) groups(x) group_size(x) n_groups(x) } \arguments{ \item{.data, .tbl, x}{A data frame or extension (like a tibble or grouped tibble).} \item{...}{Use of \code{...} is now deprecated; please use \code{group_by()} first instead.} } \description{ This collection of functions accesses data about grouped data frames in various ways: \itemize{ \item \code{group_data()} returns a data frame that defines the grouping structure. The columns give the values of the grouping variables. The last column, always called \code{.rows}, is a list of integer vectors that gives the location of the rows in each group. \item \code{group_keys()} returns a data frame describing the groups. \item \code{group_rows()} returns a list of integer vectors giving the rows that each group contains. \item \code{group_indices()} returns an integer vector the same length as \code{.data} that gives the group that each row belongs to. \item \code{group_vars()} gives names of grouping variables as character vector. \item \code{groups()} gives the names of the grouping variables as a list of symbols. \item \code{group_size()} gives the size of each group. \item \code{n_groups()} gives the total number of groups. } See \link{context} for equivalent functions that return values for the \emph{current} group. } \examples{ df <- tibble(x = c(1,1,2,2)) group_vars(df) group_rows(df) group_data(df) group_indices(df) gf <- group_by(df, x) group_vars(gf) group_rows(gf) group_data(gf) group_indices(gf) } \keyword{internal} dplyr/man/slice.Rd0000644000176200001440000001713514472225345013564 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/slice.R \name{slice} \alias{slice} \alias{slice_head} \alias{slice_tail} \alias{slice_min} \alias{slice_max} \alias{slice_sample} \title{Subset rows using their positions} \usage{ slice(.data, ..., .by = NULL, .preserve = FALSE) slice_head(.data, ..., n, prop, by = NULL) slice_tail(.data, ..., n, prop, by = NULL) slice_min( .data, order_by, ..., n, prop, by = NULL, with_ties = TRUE, na_rm = FALSE ) slice_max( .data, order_by, ..., n, prop, by = NULL, with_ties = TRUE, na_rm = FALSE ) slice_sample(.data, ..., n, prop, by = NULL, weight_by = NULL, replace = FALSE) } \arguments{ \item{.data}{A data frame, data frame extension (e.g. a tibble), or a lazy data frame (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for more details.} \item{...}{For \code{slice()}: <\code{\link[rlang:args_data_masking]{data-masking}}> Integer row values. Provide either positive values to keep, or negative values to drop. The values provided must be either all positive or all negative. Indices beyond the number of rows in the input are silently ignored. For \verb{slice_*()}, these arguments are passed on to methods.} \item{.by, by}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} <\code{\link[=dplyr_tidy_select]{tidy-select}}> Optionally, a selection of columns to group by for just this operation, functioning as an alternative to \code{\link[=group_by]{group_by()}}. For details and examples, see \link[=dplyr_by]{?dplyr_by}.} \item{.preserve}{Relevant when the \code{.data} input is grouped. If \code{.preserve = FALSE} (the default), the grouping structure is recalculated based on the resulting data, otherwise the grouping is kept as is.} \item{n, prop}{Provide either \code{n}, the number of rows, or \code{prop}, the proportion of rows to select. If neither are supplied, \code{n = 1} will be used. If \code{n} is greater than the number of rows in the group (or \code{prop > 1}), the result will be silently truncated to the group size. \code{prop} will be rounded towards zero to generate an integer number of rows. A negative value of \code{n} or \code{prop} will be subtracted from the group size. For example, \code{n = -2} with a group of 5 rows will select 5 - 2 = 3 rows; \code{prop = -0.25} with 8 rows will select 8 * (1 - 0.25) = 6 rows.} \item{order_by}{<\code{\link[rlang:args_data_masking]{data-masking}}> Variable or function of variables to order by. To order by multiple variables, wrap them in a data frame or tibble.} \item{with_ties}{Should ties be kept together? The default, \code{TRUE}, may return more rows than you request. Use \code{FALSE} to ignore ties, and return the first \code{n} rows.} \item{na_rm}{Should missing values in \code{order_by} be removed from the result? If \code{FALSE}, \code{NA} values are sorted to the end (like in \code{\link[=arrange]{arrange()}}), so they will only be included if there are insufficient non-missing values to reach \code{n}/\code{prop}.} \item{weight_by}{<\code{\link[rlang:args_data_masking]{data-masking}}> Sampling weights. This must evaluate to a vector of non-negative numbers the same length as the input. Weights are automatically standardised to sum to 1.} \item{replace}{Should sampling be performed with (\code{TRUE}) or without (\code{FALSE}, the default) replacement.} } \value{ An object of the same type as \code{.data}. The output has the following properties: \itemize{ \item Each row may appear 0, 1, or many times in the output. \item Columns are not modified. \item Groups are not modified. \item Data frame attributes are preserved. } } \description{ \code{slice()} lets you index rows by their (integer) locations. It allows you to select, remove, and duplicate rows. It is accompanied by a number of helpers for common use cases: \itemize{ \item \code{slice_head()} and \code{slice_tail()} select the first or last rows. \item \code{slice_sample()} randomly selects rows. \item \code{slice_min()} and \code{slice_max()} select rows with the smallest or largest values of a variable. } If \code{.data} is a \link{grouped_df}, the operation will be performed on each group, so that (e.g.) \code{slice_head(df, n = 5)} will select the first five rows in each group. } \details{ Slice does not work with relational databases because they have no intrinsic notion of row order. If you want to perform the equivalent operation, use \code{\link[=filter]{filter()}} and \code{\link[=row_number]{row_number()}}. } \section{Methods}{ These function are \strong{generic}s, which means that packages can provide implementations (methods) for other classes. See the documentation of individual methods for extra arguments and differences in behaviour. Methods available in currently loaded packages: \itemize{ \item \code{slice()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice")}. \item \code{slice_head()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice_head")}. \item \code{slice_tail()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice_tail")}. \item \code{slice_min()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice_min")}. \item \code{slice_max()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice_max")}. \item \code{slice_sample()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice_sample")}. } } \examples{ # Similar to head(mtcars, 1): mtcars \%>\% slice(1L) # Similar to tail(mtcars, 1): mtcars \%>\% slice(n()) mtcars \%>\% slice(5:n()) # Rows can be dropped with negative indices: slice(mtcars, -(1:4)) # First and last rows based on existing order mtcars \%>\% slice_head(n = 5) mtcars \%>\% slice_tail(n = 5) # Rows with minimum and maximum values of a variable mtcars \%>\% slice_min(mpg, n = 5) mtcars \%>\% slice_max(mpg, n = 5) # slice_min() and slice_max() may return more rows than requested # in the presence of ties. mtcars \%>\% slice_min(cyl, n = 1) # Use with_ties = FALSE to return exactly n matches mtcars \%>\% slice_min(cyl, n = 1, with_ties = FALSE) # Or use additional variables to break the tie: mtcars \%>\% slice_min(tibble(cyl, mpg), n = 1) # slice_sample() allows you to random select with or without replacement mtcars \%>\% slice_sample(n = 5) mtcars \%>\% slice_sample(n = 5, replace = TRUE) # you can optionally weight by a variable - this code weights by the # physical weight of the cars, so heavy cars are more likely to get # selected mtcars \%>\% slice_sample(weight_by = wt, n = 5) # Group wise operation ---------------------------------------- df <- tibble( group = rep(c("a", "b", "c"), c(1, 2, 4)), x = runif(7) ) # All slice helpers operate per group, silently truncating to the group # size, so the following code works without error df \%>\% group_by(group) \%>\% slice_head(n = 2) # When specifying the proportion of rows to include non-integer sizes # are rounded down, so group a gets 0 rows df \%>\% group_by(group) \%>\% slice_head(prop = 0.5) # Filter equivalents -------------------------------------------- # slice() expressions can often be written to use `filter()` and # `row_number()`, which can also be translated to SQL. For many databases, # you'll need to supply an explicit variable to use to compute the row number. filter(mtcars, row_number() == 1L) filter(mtcars, row_number() == n()) filter(mtcars, between(row_number(), 5, n())) } \seealso{ Other single table verbs: \code{\link{arrange}()}, \code{\link{filter}()}, \code{\link{mutate}()}, \code{\link{reframe}()}, \code{\link{rename}()}, \code{\link{select}()}, \code{\link{summarise}()} } \concept{single table verbs} dplyr/man/c_across.Rd0000644000176200001440000000217214366556340014260 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/across.R \name{c_across} \alias{c_across} \title{Combine values from multiple columns} \usage{ c_across(cols) } \arguments{ \item{cols}{<\code{\link[=dplyr_tidy_select]{tidy-select}}> Columns to transform. You can't select grouping columns because they are already automatically handled by the verb (i.e. \code{\link[=summarise]{summarise()}} or \code{\link[=mutate]{mutate()}}).} } \description{ \code{c_across()} is designed to work with \code{\link[=rowwise]{rowwise()}} to make it easy to perform row-wise aggregations. It has two differences from \code{c()}: \itemize{ \item It uses tidy select semantics so you can easily select multiple variables. See \code{vignette("rowwise")} for more details. \item It uses \code{\link[vctrs:vec_c]{vctrs::vec_c()}} in order to give safer outputs. } } \examples{ df <- tibble(id = 1:4, w = runif(4), x = runif(4), y = runif(4), z = runif(4)) df \%>\% rowwise() \%>\% mutate( sum = sum(c_across(w:z)), sd = sd(c_across(w:z)) ) } \seealso{ \code{\link[=across]{across()}} for a function that returns a tibble. } dplyr/man/rows.Rd0000644000176200001440000001542014366556340013456 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/rows.R \name{rows} \alias{rows} \alias{rows_insert} \alias{rows_append} \alias{rows_update} \alias{rows_patch} \alias{rows_upsert} \alias{rows_delete} \title{Manipulate individual rows} \usage{ rows_insert( x, y, by = NULL, ..., conflict = c("error", "ignore"), copy = FALSE, in_place = FALSE ) rows_append(x, y, ..., copy = FALSE, in_place = FALSE) rows_update( x, y, by = NULL, ..., unmatched = c("error", "ignore"), copy = FALSE, in_place = FALSE ) rows_patch( x, y, by = NULL, ..., unmatched = c("error", "ignore"), copy = FALSE, in_place = FALSE ) rows_upsert(x, y, by = NULL, ..., copy = FALSE, in_place = FALSE) rows_delete( x, y, by = NULL, ..., unmatched = c("error", "ignore"), copy = FALSE, in_place = FALSE ) } \arguments{ \item{x, y}{A pair of data frames or data frame extensions (e.g. a tibble). \code{y} must have the same columns of \code{x} or a subset.} \item{by}{An unnamed character vector giving the key columns. The key columns must exist in both \code{x} and \code{y}. Keys typically uniquely identify each row, but this is only enforced for the key values of \code{y} when \code{rows_update()}, \code{rows_patch()}, or \code{rows_upsert()} are used. By default, we use the first column in \code{y}, since the first column is a reasonable place to put an identifier variable.} \item{...}{Other parameters passed onto methods.} \item{conflict}{For \code{rows_insert()}, how should keys in \code{y} that conflict with keys in \code{x} be handled? A conflict arises if there is a key in \code{y} that already exists in \code{x}. One of: \itemize{ \item \code{"error"}, the default, will error if there are any keys in \code{y} that conflict with keys in \code{x}. \item \code{"ignore"} will ignore rows in \code{y} with keys that conflict with keys in \code{x}. }} \item{copy}{If \code{x} and \code{y} are not from the same data source, and \code{copy} is \code{TRUE}, then \code{y} will be copied into the same src as \code{x}. This allows you to join tables across srcs, but it is a potentially expensive operation so you must opt into it.} \item{in_place}{Should \code{x} be modified in place? This argument is only relevant for mutable backends (e.g. databases, data.tables). When \code{TRUE}, a modified version of \code{x} is returned invisibly; when \code{FALSE}, a new object representing the resulting changes is returned.} \item{unmatched}{For \code{rows_update()}, \code{rows_patch()}, and \code{rows_delete()}, how should keys in \code{y} that are unmatched by the keys in \code{x} be handled? One of: \itemize{ \item \code{"error"}, the default, will error if there are any keys in \code{y} that are unmatched by the keys in \code{x}. \item \code{"ignore"} will ignore rows in \code{y} with keys that are unmatched by the keys in \code{x}. }} } \value{ An object of the same type as \code{x}. The order of the rows and columns of \code{x} is preserved as much as possible. The output has the following properties: \itemize{ \item \code{rows_update()} and \code{rows_patch()} preserve the number of rows; \code{rows_insert()}, \code{rows_append()}, and \code{rows_upsert()} return all existing rows and potentially new rows; \code{rows_delete()} returns a subset of the rows. \item Columns are not added, removed, or relocated, though the data may be updated. \item Groups are taken from \code{x}. \item Data frame attributes are taken from \code{x}. } If \code{in_place = TRUE}, the result will be returned invisibly. } \description{ These functions provide a framework for modifying rows in a table using a second table of data. The two tables are matched \code{by} a set of key variables whose values typically uniquely identify each row. The functions are inspired by SQL's \code{INSERT}, \code{UPDATE}, and \code{DELETE}, and can optionally modify \code{in_place} for selected backends. \itemize{ \item \code{rows_insert()} adds new rows (like \code{INSERT}). By default, key values in \code{y} must not exist in \code{x}. \item \code{rows_append()} works like \code{rows_insert()} but ignores keys. \item \code{rows_update()} modifies existing rows (like \code{UPDATE}). Key values in \code{y} must be unique, and, by default, key values in \code{y} must exist in \code{x}. \item \code{rows_patch()} works like \code{rows_update()} but only overwrites \code{NA} values. \item \code{rows_upsert()} inserts or updates depending on whether or not the key value in \code{y} already exists in \code{x}. Key values in \code{y} must be unique. \item \code{rows_delete()} deletes rows (like \code{DELETE}). By default, key values in \code{y} must exist in \code{x}. } } \section{Methods}{ These function are \strong{generic}s, which means that packages can provide implementations (methods) for other classes. See the documentation of individual methods for extra arguments and differences in behaviour. Methods available in currently loaded packages: \itemize{ \item \code{rows_insert()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("rows_insert")}. \item \code{rows_append()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("rows_append")}. \item \code{rows_update()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("rows_update")}. \item \code{rows_patch()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("rows_patch")}. \item \code{rows_upsert()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("rows_upsert")}. \item \code{rows_delete()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("rows_delete")}. } } \examples{ data <- tibble(a = 1:3, b = letters[c(1:2, NA)], c = 0.5 + 0:2) data # Insert rows_insert(data, tibble(a = 4, b = "z")) # By default, if a key in `y` matches a key in `x`, then it can't be inserted # and will throw an error. Alternatively, you can ignore rows in `y` # containing keys that conflict with keys in `x` with `conflict = "ignore"`, # or you can use `rows_append()` to ignore keys entirely. try(rows_insert(data, tibble(a = 3, b = "z"))) rows_insert(data, tibble(a = 3, b = "z"), conflict = "ignore") rows_append(data, tibble(a = 3, b = "z")) # Update rows_update(data, tibble(a = 2:3, b = "z")) rows_update(data, tibble(b = "z", a = 2:3), by = "a") # Variants: patch and upsert rows_patch(data, tibble(a = 2:3, b = "z")) rows_upsert(data, tibble(a = 2:4, b = "z")) # Delete and truncate rows_delete(data, tibble(a = 2:3)) rows_delete(data, tibble(a = 2:3, b = "b")) # By default, for update, patch, and delete it is an error if a key in `y` # doesn't exist in `x`. You can ignore rows in `y` that have unmatched keys # with `unmatched = "ignore"`. y <- tibble(a = 3:4, b = "z") try(rows_update(data, y, by = "a")) rows_update(data, y, by = "a", unmatched = "ignore") rows_patch(data, y, by = "a", unmatched = "ignore") rows_delete(data, y, by = "a", unmatched = "ignore") } dplyr/man/context.Rd0000644000176200001440000000352214366556340014150 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/context.R \name{context} \alias{context} \alias{n} \alias{cur_group} \alias{cur_group_id} \alias{cur_group_rows} \alias{cur_column} \title{Information about the "current" group or variable} \usage{ n() cur_group() cur_group_id() cur_group_rows() cur_column() } \description{ These functions return information about the "current" group or "current" variable, so only work inside specific contexts like \code{\link[=summarise]{summarise()}} and \code{\link[=mutate]{mutate()}}. \itemize{ \item \code{n()} gives the current group size. \item \code{cur_group()} gives the group keys, a tibble with one row and one column for each grouping variable. \item \code{cur_group_id()} gives a unique numeric identifier for the current group. \item \code{cur_group_rows()} gives the row indices for the current group. \item \code{cur_column()} gives the name of the current column (in \code{\link[=across]{across()}} only). } See \code{\link[=group_data]{group_data()}} for equivalent functions that return values for all groups. See \code{\link[=pick]{pick()}} for a way to select a subset of columns using tidyselect syntax while inside \code{summarise()} or \code{mutate()}. } \section{data.table}{ If you're familiar with data.table: \itemize{ \item \code{cur_group_id()} <-> \code{.GRP} \item \code{cur_group()} <-> \code{.BY} \item \code{cur_group_rows()} <-> \code{.I} } See \code{\link[=pick]{pick()}} for an equivalent to \code{.SD}. } \examples{ df <- tibble( g = sample(rep(letters[1:3], 1:3)), x = runif(6), y = runif(6) ) gf <- df \%>\% group_by(g) gf \%>\% summarise(n = n()) gf \%>\% mutate(id = cur_group_id()) gf \%>\% reframe(row = cur_group_rows()) gf \%>\% summarise(data = list(cur_group())) gf \%>\% mutate(across(everything(), ~ paste(cur_column(), round(.x, 2)))) } dplyr/man/rmd/0000755000176200001440000000000014406402754012747 5ustar liggesusersdplyr/man/rmd/select.Rmd0000644000176200001440000000303314366556340014677 0ustar liggesusers```{r, include=FALSE} # So the second library() call doesn't show messages library(tidyverse) ``` Here we show the usage for the basic selection operators. See the specific help pages to learn about helpers like [starts_with()]. The selection language can be used in functions like `dplyr::select()` or `tidyr::pivot_longer()`. Let's first attach the tidyverse: ```{r} library(tidyverse) # For better printing iris <- as_tibble(iris) ``` Select variables by name: ```{r} starwars %>% select(height) iris %>% pivot_longer(Sepal.Length) ``` Select multiple variables by separating them with commas. Note how the order of columns is determined by the order of inputs: ```{r} starwars %>% select(homeworld, height, mass) ``` Functions like `tidyr::pivot_longer()` don't take variables with dots. In this case use `c()` to select multiple variables: ```{r} iris %>% pivot_longer(c(Sepal.Length, Petal.Length)) ``` ## Operators: The `:` operator selects a range of consecutive variables: ```{r} starwars %>% select(name:mass) ``` The `!` operator negates a selection: ```{r} starwars %>% select(!(name:mass)) iris %>% select(!c(Sepal.Length, Petal.Length)) iris %>% select(!ends_with("Width")) ``` `&` and `|` take the intersection or the union of two selections: ```{r} iris %>% select(starts_with("Petal") & ends_with("Width")) iris %>% select(starts_with("Petal") | ends_with("Width")) ``` To take the difference between two selections, combine the `&` and `!` operators: ```{r} iris %>% select(starts_with("Petal") & !ends_with("Width")) ``` dplyr/man/rmd/by.Rmd0000644000176200001440000001467014406402754014035 0ustar liggesusers--- output: html_document editor_options: chunk_output_type: console --- ```{r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE) ``` There are two ways to group in dplyr: - Persistent grouping with [group_by()] - Per-operation grouping with `.by`/`by` This help page is dedicated to explaining where and why you might want to use the latter. Depending on the dplyr verb, the per-operation grouping argument may be named `.by` or `by`. The *Supported verbs* section below outlines this on a case-by-case basis. The remainder of this page will refer to `.by` for simplicity. Grouping radically affects the computation of the dplyr verb you use it with, and one of the goals of `.by` is to allow you to place that grouping specification alongside the code that actually uses it. As an added benefit, with `.by` you no longer need to remember to [ungroup()] after [summarise()], and `summarise()` won't ever message you about how it's handling the groups! This idea comes from [data.table](https://CRAN.R-project.org/package=data.table), which allows you to specify `by` alongside modifications in `j`, like: `dt[, .(x = mean(x)), by = g]`. ### Supported verbs - [`mutate(.by = )`][mutate()] - [`summarise(.by = )`][summarise()] - [`reframe(.by = )`][reframe()] - [`filter(.by = )`][filter()] - [`slice(.by = )`][slice()] - [`slice_head(by = )`][slice_head()] and [`slice_tail(by = )`][slice_tail()] - [`slice_min(by = )`][slice_min()] and [`slice_max(by = )`][slice_max()] - [`slice_sample(by = )`][slice_sample()] Note that some dplyr verbs use `by` while others use `.by`. This is a purely technical difference. ### Differences between `.by` and `group_by()` | `.by` | `group_by()` | |---------------------------------------------------------|--------------------------------------------------------------------| | Grouping only affects a single verb | Grouping is persistent across multiple verbs | | Selects variables with [tidy-select][dplyr_tidy_select] | Computes expressions with [data-masking][rlang::args_data_masking] | | Summaries use existing order of group keys | Summaries sort group keys in ascending order | ### Using `.by` Let's take a look at the two grouping approaches using this `expenses` data set, which tracks costs accumulated across various `id`s and `region`s: ```{r} expenses <- tibble( id = c(1, 2, 1, 3, 1, 2, 3), region = c("A", "A", "A", "B", "B", "A", "A"), cost = c(25, 20, 19, 12, 9, 6, 6) ) expenses ``` Imagine that you wanted to compute the average cost per region. You'd probably write something like this: ```{r} expenses %>% group_by(region) %>% summarise(cost = mean(cost)) ``` Instead, you can now specify the grouping *inline* within the verb: ```{r} expenses %>% summarise(cost = mean(cost), .by = region) ``` `.by` applies to a single operation, meaning that since `expenses` was an ungrouped data frame, the result after applying `.by` will also always be an ungrouped data frame, regardless of the number of grouping columns. ```{r} expenses %>% summarise(cost = mean(cost), .by = c(id, region)) ``` Compare that with `group_by() %>% summarise()`, where `summarise()` generally peels off 1 layer of grouping by default, typically with a message that it is doing so: ```{r} expenses %>% group_by(id, region) %>% summarise(cost = mean(cost)) ``` Because `.by` grouping applies to a single operation, you don't need to worry about ungrouping, and it never needs to emit a message to remind you what it is doing with the groups. Note that with `.by` we specified multiple columns to group by using the [tidy-select][dplyr_tidy_select] syntax `c(id, region)`. If you have a character vector of column names you'd like to group by, you can do so with `.by = all_of(my_cols)`. It will group by the columns in the order they were provided. To prevent surprising results, you can't use `.by` on an existing grouped data frame: ```{r, error=TRUE} expenses %>% group_by(id) %>% summarise(cost = mean(cost), .by = c(id, region)) ``` So far we've focused on the usage of `.by` with `summarise()`, but `.by` works with a number of other dplyr verbs. For example, you could append the mean cost per region onto the original data frame as a new column rather than computing a summary: ```{r} expenses %>% mutate(cost_by_region = mean(cost), .by = region) ``` Or you could slice out the maximum cost per combination of id and region: ```{r} # Note that the argument is named `by` in `slice_max()` expenses %>% slice_max(cost, n = 1, by = c(id, region)) ``` ### Result ordering When used with `.by`, `summarise()`, `reframe()`, and `slice()` all maintain the ordering of the existing data. This is different from `group_by()`, which has always sorted the group keys in ascending order. ```{r} df <- tibble( month = c("jan", "jan", "feb", "feb", "mar"), temp = c(20, 25, 18, 20, 40) ) # Uses ordering by "first appearance" in the original data df %>% summarise(average_temp = mean(temp), .by = month) # Sorts in ascending order df %>% group_by(month) %>% summarise(average_temp = mean(temp)) ``` If you need sorted group keys, we recommend that you explicitly use [arrange()] either before or after the call to `summarise()`, `reframe()`, or `slice()`. This also gives you full access to all of `arrange()`'s features, such as `desc()` and the `.locale` argument. ### Verbs without `.by` support If a dplyr verb doesn't support `.by`, then that typically means that the verb isn't inherently affected by grouping. For example, [pull()] and [rename()] don't support `.by`, because specifying columns to group by would not affect their implementations. That said, there are a few exceptions to this where sometimes a dplyr verb doesn't support `.by`, but *does* have special support for grouped data frames created by [group_by()]. This is typically because the verbs are required to retain the grouping columns, for example: - [select()] always retains grouping columns, with a message if any aren't specified in the `select()` call. - [distinct()] and [count()] place unspecified grouping columns at the front of the data frame before computing their results. - [arrange()] has a `.by_group` argument to optionally order by grouping columns first. If `group_by()` didn't exist, then these verbs would not have special support for grouped data frames. dplyr/man/rmd/overview.Rmd0000644000176200001440000000300314366556340015263 0ustar liggesusers Tidyverse selections implement a dialect of R where operators make it easy to select variables: - `:` for selecting a range of consecutive variables. - `!` for taking the complement of a set of variables. - `&` and `|` for selecting the intersection or the union of two sets of variables. - `c()` for combining selections. In addition, you can use __selection helpers__. Some helpers select specific columns: * [`everything()`][tidyselect::everything]: Matches all variables. * [`last_col()`][tidyselect::last_col]: Select last variable, possibly with an offset. * [group_cols()]: Select all grouping columns. Other helpers select variables by matching patterns in their names: * [`starts_with()`][tidyselect::starts_with]: Starts with a prefix. * [`ends_with()`][tidyselect::ends_with()]: Ends with a suffix. * [`contains()`][tidyselect::contains()]: Contains a literal string. * [`matches()`][tidyselect::matches()]: Matches a regular expression. * [`num_range()`][tidyselect::num_range()]: Matches a numerical range like x01, x02, x03. Or from variables stored in a character vector: * [`all_of()`][tidyselect::all_of()]: Matches variable names in a character vector. All names must be present, otherwise an out-of-bounds error is thrown. * [`any_of()`][tidyselect::any_of()]: Same as `all_of()`, except that no error is thrown for names that don't exist. Or using a predicate function: * [`where()`][tidyselect::where()]: Applies a function to all variables and selects those for which the function returns `TRUE`. dplyr/man/distinct.Rd0000644000176200001440000000512614406402754014301 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/distinct.R \name{distinct} \alias{distinct} \title{Keep distinct/unique rows} \usage{ distinct(.data, ..., .keep_all = FALSE) } \arguments{ \item{.data}{A data frame, data frame extension (e.g. a tibble), or a lazy data frame (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for more details.} \item{...}{<\code{\link[rlang:args_data_masking]{data-masking}}> Optional variables to use when determining uniqueness. If there are multiple rows for a given combination of inputs, only the first row will be preserved. If omitted, will use all variables in the data frame.} \item{.keep_all}{If \code{TRUE}, keep all variables in \code{.data}. If a combination of \code{...} is not distinct, this keeps the first row of values.} } \value{ An object of the same type as \code{.data}. The output has the following properties: \itemize{ \item Rows are a subset of the input but appear in the same order. \item Columns are not modified if \code{...} is empty or \code{.keep_all} is \code{TRUE}. Otherwise, \code{distinct()} first calls \code{mutate()} to create new columns. \item Groups are not modified. \item Data frame attributes are preserved. } } \description{ Keep only unique/distinct rows from a data frame. This is similar to \code{\link[=unique.data.frame]{unique.data.frame()}} but considerably faster. } \section{Methods}{ This function is a \strong{generic}, which means that packages can provide implementations (methods) for other classes. See the documentation of individual methods for extra arguments and differences in behaviour. The following methods are currently available in loaded packages: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("distinct")}. } \examples{ df <- tibble( x = sample(10, 100, rep = TRUE), y = sample(10, 100, rep = TRUE) ) nrow(df) nrow(distinct(df)) nrow(distinct(df, x, y)) distinct(df, x) distinct(df, y) # You can choose to keep all other variables as well distinct(df, x, .keep_all = TRUE) distinct(df, y, .keep_all = TRUE) # You can also use distinct on computed variables distinct(df, diff = abs(x - y)) # Use `pick()` to select columns with tidy-select distinct(starwars, pick(contains("color"))) # Grouping ------------------------------------------------- df <- tibble( g = c(1, 1, 2, 2, 2), x = c(1, 1, 2, 1, 2), y = c(3, 2, 1, 3, 1) ) df <- df \%>\% group_by(g) # With grouped data frames, distinctness is computed within each group df \%>\% distinct(x) # When `...` are omitted, `distinct()` still computes distinctness using # all variables in the data frame df \%>\% distinct() } dplyr/man/bind_rows.Rd0000644000176200001440000000251314366556340014451 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/bind-rows.R \name{bind_rows} \alias{bind_rows} \alias{bind} \title{Bind multiple data frames by row} \usage{ bind_rows(..., .id = NULL) } \arguments{ \item{...}{Data frames to combine. Each argument can either be a data frame, a list that could be a data frame, or a list of data frames. Columns are matched by name, and any missing columns will be filled with \code{NA}.} \item{.id}{The name of an optional identifier column. Provide a string to create an output column that identifies each input. The column will use names if available, otherwise it will use positions.} } \value{ A data frame the same type as the first element of \code{...}. } \description{ Bind any number of data frames by row, making a longer result. This is similar to \code{do.call(rbind, dfs)}, but the output will contain all columns that appear in any of the inputs. } \examples{ df1 <- tibble(x = 1:2, y = letters[1:2]) df2 <- tibble(x = 4:5, z = 1:2) # You can supply individual data frames as arguments: bind_rows(df1, df2) # Or a list of data frames: bind_rows(list(df1, df2)) # When you supply a column name with the `.id` argument, a new # column is created to link each row to its original data frame bind_rows(list(df1, df2), .id = "id") bind_rows(list(a = df1, b = df2), .id = "id") } dplyr/man/all_vars.Rd0000644000176200001440000000240614406402754014261 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/colwise.R \name{all_vars} \alias{all_vars} \alias{any_vars} \title{Apply predicate to all variables} \usage{ all_vars(expr) any_vars(expr) } \arguments{ \item{expr}{<\code{\link[rlang:args_data_masking]{data-masking}}> An expression that returns a logical vector, using \code{.} to refer to the "current" variable.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#superseded}{\figure{lifecycle-superseded.svg}{options: alt='[Superseded]'}}}{\strong{[Superseded]}} \code{all_vars()} and \code{any_vars()} were only needed for the scoped verbs, which have been superseded by the use of \code{\link[=across]{across()}} in an existing verb. See \code{vignette("colwise")} for details. These quoting functions signal to scoped filtering verbs (e.g. \code{\link[=filter_if]{filter_if()}} or \code{\link[=filter_all]{filter_all()}}) that a predicate expression should be applied to all relevant variables. The \code{all_vars()} variant takes the intersection of the predicate expressions with \code{&} while the \code{any_vars()} variant takes the union with \code{|}. } \seealso{ \code{\link[=vars]{vars()}} for other quoting functions that you can use with scoped verbs. } dplyr/man/reframe.Rd0000644000176200001440000001115014406402754014073 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/reframe.R \name{reframe} \alias{reframe} \title{Transform each group to an arbitrary number of rows} \usage{ reframe(.data, ..., .by = NULL) } \arguments{ \item{.data}{A data frame, data frame extension (e.g. a tibble), or a lazy data frame (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for more details.} \item{...}{<\code{\link[rlang:args_data_masking]{data-masking}}> Name-value pairs of functions. The name will be the name of the variable in the result. The value can be a vector of any length. Unnamed data frame values add multiple columns from a single expression.} \item{.by}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} <\code{\link[=dplyr_tidy_select]{tidy-select}}> Optionally, a selection of columns to group by for just this operation, functioning as an alternative to \code{\link[=group_by]{group_by()}}. For details and examples, see \link[=dplyr_by]{?dplyr_by}.} } \value{ If \code{.data} is a tibble, a tibble. Otherwise, a data.frame. \itemize{ \item The rows originate from the underlying grouping keys. \item The columns are a combination of the grouping keys and the expressions that you provide. \item The output is always ungrouped. \item Data frame attributes are \strong{not} preserved, because \code{reframe()} fundamentally creates a new data frame. } } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} While \code{\link[=summarise]{summarise()}} requires that each argument returns a single value, and \code{\link[=mutate]{mutate()}} requires that each argument returns the same number of rows as the input, \code{reframe()} is a more general workhorse with no requirements on the number of rows returned per group. \code{reframe()} creates a new data frame by applying functions to columns of an existing data frame. It is most similar to \code{summarise()}, with two big differences: \itemize{ \item \code{reframe()} can return an arbitrary number of rows per group, while \code{summarise()} reduces each group down to a single row. \item \code{reframe()} always returns an ungrouped data frame, while \code{summarise()} might return a grouped or rowwise data frame, depending on the scenario. } We expect that you'll use \code{summarise()} much more often than \code{reframe()}, but \code{reframe()} can be particularly helpful when you need to apply a complex function that doesn't return a single summary value. } \section{Connection to tibble}{ \code{reframe()} is theoretically connected to two functions in tibble, \code{\link[tibble:enframe]{tibble::enframe()}} and \code{\link[tibble:enframe]{tibble::deframe()}}: \itemize{ \item \code{enframe()}: vector -> data frame \item \code{deframe()}: data frame -> vector \item \code{reframe()}: data frame -> data frame } } \section{Methods}{ This function is a \strong{generic}, which means that packages can provide implementations (methods) for other classes. See the documentation of individual methods for extra arguments and differences in behaviour. The following methods are currently available in loaded packages: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("reframe")}. } \examples{ table <- c("a", "b", "d", "f") df <- tibble( g = c(1, 1, 1, 2, 2, 2, 2), x = c("e", "a", "b", "c", "f", "d", "a") ) # `reframe()` allows you to apply functions that return # an arbitrary number of rows df \%>\% reframe(x = intersect(x, table)) # Functions are applied per group, and each group can return a # different number of rows. df \%>\% reframe(x = intersect(x, table), .by = g) # The output is always ungrouped, even when using `group_by()` df \%>\% group_by(g) \%>\% reframe(x = intersect(x, table)) # You can add multiple columns at once using a single expression by returning # a data frame. quantile_df <- function(x, probs = c(0.25, 0.5, 0.75)) { tibble( val = quantile(x, probs, na.rm = TRUE), quant = probs ) } x <- c(10, 15, 18, 12) quantile_df(x) starwars \%>\% reframe(quantile_df(height)) starwars \%>\% reframe(quantile_df(height), .by = homeworld) starwars \%>\% reframe( across(c(height, mass), quantile_df, .unpack = TRUE), .by = homeworld ) } \seealso{ Other single table verbs: \code{\link{arrange}()}, \code{\link{filter}()}, \code{\link{mutate}()}, \code{\link{rename}()}, \code{\link{select}()}, \code{\link{slice}()}, \code{\link{summarise}()} } \concept{single table verbs} dplyr/man/new_grouped_df.Rd0000644000176200001440000000315214366556340015452 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/grouped-df.R, R/rowwise.R \name{new_grouped_df} \alias{new_grouped_df} \alias{validate_grouped_df} \alias{new_rowwise_df} \alias{validate_rowwise_df} \title{Low-level construction and validation for the grouped_df and rowwise_df classes} \usage{ new_grouped_df(x, groups, ..., class = character()) validate_grouped_df(x, check_bounds = FALSE) new_rowwise_df(data, group_data = NULL, ..., class = character()) validate_rowwise_df(x) } \arguments{ \item{x}{A data frame} \item{groups}{The grouped structure, \code{groups} should be a data frame. Its last column should be called \code{.rows} and be a list of 1 based integer vectors that all are between 1 and the number of rows of \code{.data}.} \item{...}{additional attributes} \item{class}{additional class, will be prepended to canonical classes.} \item{check_bounds}{whether to check all indices for out of bounds problems in \code{grouped_df} objects} } \description{ \code{new_grouped_df()} and \code{new_rowwise_df()} are constructors designed to be high-performance so only check types, not values. This means it is the caller's responsibility to create valid values, and hence this is for expert use only. \code{validate_grouped_df()} and \code{validate_rowwise_df()} validate the attributes of a \code{grouped_df} or a \code{rowwise_df}. } \examples{ # 5 bootstrap samples tbl <- new_grouped_df( tibble(x = rnorm(10)), groups = tibble(".rows" := replicate(5, sample(1:10, replace = TRUE), simplify = FALSE)) ) # mean of each bootstrap sample summarise(tbl, x = mean(x)) } \keyword{internal} dplyr/man/band_members.Rd0000644000176200001440000000166513663216626015107 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/data-bands.R \docType{data} \name{band_members} \alias{band_members} \alias{band_instruments} \alias{band_instruments2} \title{Band membership} \format{ Each is a tibble with two variables and three observations } \usage{ band_members band_instruments band_instruments2 } \description{ These data sets describe band members of the Beatles and Rolling Stones. They are toy data sets that can be displayed in their entirety on a slide (e.g. to demonstrate a join). } \details{ \code{band_instruments} and \code{band_instruments2} contain the same data but use different column names for the first column of the data set. \code{band_instruments} uses \code{name}, which matches the name of the key column of \code{band_members}; \code{band_instruments2} uses \code{artist}, which does not. } \examples{ band_members band_instruments band_instruments2 } \keyword{datasets} dplyr/man/common_by.Rd0000644000176200001440000000042214277513434014440 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/join-common-by.R \name{common_by} \alias{common_by} \title{Extract out common by variables} \usage{ common_by(by = NULL, x, y) } \description{ Extract out common by variables } \keyword{internal} dplyr/man/group_by_drop_default.Rd0000644000176200001440000000124014366556340017035 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/group-by.R \name{group_by_drop_default} \alias{group_by_drop_default} \title{Default value for .drop argument of group_by} \usage{ group_by_drop_default(.tbl) } \arguments{ \item{.tbl}{A data frame} } \value{ \code{TRUE} unless \code{.tbl} is a grouped data frame that was previously obtained by \code{group_by(.drop = FALSE)} } \description{ Default value for .drop argument of group_by } \examples{ group_by_drop_default(iris) iris \%>\% group_by(Species) \%>\% group_by_drop_default() iris \%>\% group_by(Species, .drop = FALSE) \%>\% group_by_drop_default() } \keyword{internal} dplyr/DESCRIPTION0000644000176200001440000000411414525714672013127 0ustar liggesusersType: Package Package: dplyr Title: A Grammar of Data Manipulation Version: 1.1.4 Authors@R: c( person("Hadley", "Wickham", , "hadley@posit.co", role = c("aut", "cre"), comment = c(ORCID = "0000-0003-4757-117X")), person("Romain", "François", role = "aut", comment = c(ORCID = "0000-0002-2444-4226")), person("Lionel", "Henry", role = "aut"), person("Kirill", "Müller", role = "aut", comment = c(ORCID = "0000-0002-1416-3412")), person("Davis", "Vaughan", , "davis@posit.co", role = "aut", comment = c(ORCID = "0000-0003-4777-038X")), person("Posit Software, PBC", role = c("cph", "fnd")) ) Description: A fast, consistent tool for working with data frame like objects, both in memory and out of memory. License: MIT + file LICENSE URL: https://dplyr.tidyverse.org, https://github.com/tidyverse/dplyr BugReports: https://github.com/tidyverse/dplyr/issues Depends: R (>= 3.5.0) Imports: cli (>= 3.4.0), generics, glue (>= 1.3.2), lifecycle (>= 1.0.3), magrittr (>= 1.5), methods, pillar (>= 1.9.0), R6, rlang (>= 1.1.0), tibble (>= 3.2.0), tidyselect (>= 1.2.0), utils, vctrs (>= 0.6.4) Suggests: bench, broom, callr, covr, DBI, dbplyr (>= 2.2.1), ggplot2, knitr, Lahman, lobstr, microbenchmark, nycflights13, purrr, rmarkdown, RMySQL, RPostgreSQL, RSQLite, stringi (>= 1.7.6), testthat (>= 3.1.5), tidyr (>= 1.3.0), withr VignetteBuilder: knitr Config/Needs/website: tidyverse, shiny, pkgdown, tidyverse/tidytemplate Config/testthat/edition: 3 Encoding: UTF-8 LazyData: true RoxygenNote: 7.2.3 NeedsCompilation: yes Packaged: 2023-11-16 21:48:56 UTC; hadleywickham Author: Hadley Wickham [aut, cre] (), Romain François [aut] (), Lionel Henry [aut], Kirill Müller [aut] (), Davis Vaughan [aut] (), Posit Software, PBC [cph, fnd] Maintainer: Hadley Wickham Repository: CRAN Date/Publication: 2023-11-17 16:50:02 UTC dplyr/build/0000755000176200001440000000000014525507110012503 5ustar liggesusersdplyr/build/vignette.rds0000644000176200001440000000066214525507110015046 0ustar liggesusersR0CȏHGDy 6ۤvĴ-)d|{OᣋrPm ՏPo-<:B%Ix[0c5 p'-p"Xl'ƱI~'Evԥ2+ mx1_|e?8ׂ=0" WRiQ+* 9HE KfX6;SZ\t/*B9mU`źsWYгm ܳ>N+^֥*\C@S-zkU V 9\n`1o97:55 N" yi؜…SMQ} ) W˄zsY_/[oxUHQSkէ?:dplyr/build/stage23.rdb0000644000176200001440000035474314525507072014473 0ustar liggesusers |ɚ'%HIL<Pn$HHv$GwnGfU%lVU=;3CҮмfd{w%Kvf-Kc-XYmYZYZKk=KZI_? ,wT}a֯3z֓3ga'<-u/q]=+Kfa7+vYrͪv %Z2 =nz\zXc͇Tj,9swyqK?:wx̹dV^"1Q+ _Ԣuџk Oo;r&8CSGeKn^8ntF!m5=Kx )e%GɟQ&ZH}zRH ȐJLu$&8y0{PCJ ,U6nznOIwp\a(dދ&ۑXaﬞP 0 Գ*!ypGz?JzB}ݽYtJ\u !<FUNPNj~٫Tw͇n' bģX,'QXΙ|>bs5sƜ0Pf:S+>#I-!WX?A3I^RTbKn)%u*[ōއ|_cKYŤEEٸߩԌHֱt&?FfSԸ1+%+|{ri$N&9N67]i48feNPs뙖h  )ٲuus+f@y"[ HϛOl͍jzY&fKkik<6>+dCÁY뜹zfETf)U\6Kv`9e?/H1k_g;]I`q]%?Y؆3>;$݈p>}yo{9]Co]J3Z艊b g hNe*˾ǥnfB+.-/(K/4 <)U߼xUUtKIN%%7Y?j)mWO-ϱB'C 6ׁ!?Tfup^-m`.3_عjcjߦ%M/@Бzv?yKN@Z&IQ?|?k}lC%nWiOo@2M.K0ܜ`}\7{̡>aP1vkL %AP .S.ȻzT?fiSD0Ýovsϑ&*)Vo|Ńwĭ`ޜʎ-QceÌ]&8BZ^mC|K h{`1/)؋C $ψBB] `K (.YU l\-}x"e:b[⑤c7׊sg4tui"ڥNja"tf@ \/gK4C`Aάm- @n`rN֏Q9:kid1^KzlLCM x=mɶb\,lFf8*jd%Ͱrh%IBy@62mNH99whbsgUZG|Į {xaQnDi )rSY;6pa3̖f+y;l ƕ7y뎇Qڥ, L.,#eEH^C^ w@ץ6J_wcu0VZ}ZNUa' ?SXrÊHQ+Ҫ(BR Oy ,(d V9%8 7%# F5+u3kB$l<1> XJCdu/LGoCQMenUfΗ;Zc|F[$5D\bA¬Yr%~YRF88\5?!Mny,bCUWPޕM.Jo3zn᫰_e-ᷞ;MښRWr8YYs\)#SՃ!6zP]rJl~Š="ɭ5 iQF\pF; OBz?oɣM 2g(f$,K0Ymk׭rye>̒`{am|Q|5=r&U(IyɢMkh2h$\>Fc.BLi-x8D1UΫ/ UlϤ_x#Ep"Tnq (S5 ̲-Y;!>s\q6EN >vh^):AH&bhCE9z5K0KCmCɜ#ZWI|tOj#e‘"ȼ"DN%֪3wצɒ莦a[Xd[->85\x!4>`MlM}ts!]P)"s8yD[ }\.(Qe5B).۪:|U]"}~. +(k됯6MGR*Nq̮-@wkMCS7^F})%++uûߕN`#2"F7£*-̼2aFJ5_=Q|QX ?`GȝUdzL_l^x|DQYlŭ~.: N6+*sOe9}%z'm`/o)35 =1ʶؕg\cB~b{Aea_'akuBQ"עkV߳*0\sSZyA[0^ĭhPť_%lyw)4 nV9bb.aJ AȃʔqEw}O^#B.讪W=a"/8adu*ck9ڢxqY`Fo H;F=Qv1K蹾?6.{YsH@[l/n=>),lA:ꭹ9~KOȲ/fG%xNBT|ag(!uKpPidb}y-i%$bE=ʏ~Y5s(z!DʀKӦ(%PmЉ؞ 7LjFw@ޡ͚SðRrD`p7󟲿G=Ar(}7J=X'_49~m U.W0ǯ:jO= m=9:K++nY#WW֑mMClzvU40G 2̱lt,nL2̱lt,j`U[X4+0JMsdkc˦Y9V:jSm4FFrN[fX5/P*FVӐt,, kۤ*eVQ3=_eJ_:]Xb-&1ta.Jn[ *5b" !yy[~h=&L׀]oEux$kg{ܱzO߷hґds-HIy0"k!zҌmC6a;n-!PޑF;!ؓiYom6f26'R+~iw =s}yh)Հ8Vƽ5'umi{;R KVm_8txg*NVM mxޭqzr>ھqd nfULRl uwl|Ӫvg.wy}p{כ;㚭XXYA[FKݺWL16xvifvfZx!TVߪ޾ݴ#r&<{jd˫ݲ@-nXr-Kx1Om6?evvWoZbYZől ۻ֮&^a n9A eC[Vw~]՜P%yr){jgښ涤Uu N~ o [E+$5ٍšv> [|p@i0NՁ?y_z_/WALKnezzqHZ3#K2w6-4GrH~qͳs7hDL RM^7|7=>z 6c,><[u䰾'&?%|܄U>8?؎n_<{qi2gN+~طԦ&kv!A68›3w'3%`3[,dݬ as+Rv=2B+VXYg/==hHF#`ڕ!y"o=?2O>|Kc~7J{w#!!g/B.<>H31C1;zrM> l7>`R9|w)j$}wDXtAG'΄.Y̝Ojr>! [Oφ4r5NARפqcTOQ^ؘ4ߨ; /zTl(^FM"߂-mm[U{{@OYH8FoKp޽=n>*ȗZk j>DN2A?&FTԅK/#A#qk; *T]hRƳɮB^%xj7!KMbt N6Iy):=lX'\nd#ދQN6NZFl"6p6RpS/dKCI)l&smFMXwV߮ފ ^h09Qc*: d1J>!ImzW:;m7'?Ų ؛AC}ԯTH;FSLjSJMc5p#CpӍL!XHx$Pfzuoi;llsub*GM`WiC Bl(re﬈1|hmEÛXG\풇|m/ t8d\"h^M~c-5 [siWat,Q8;Qqq%xqeEu4by8Yi#0>9syiOpqZV1$Fgj>*yg5i)S NB*ϡ&1R)btݱ)p t} ujHIjgZF-{ Qd$6O!}G!j.T5w \i73̰Rrn\J/fŚKa5w<2t]m*LRuHRr@n%K"w!}ե!MKtԱ\,?ڡ7Kz8#W(IR +"#_aF(]Xj4Hv+F6m"cws̳]w]ZۄG|f-g"ij ">m2ެ|Dm<YjN7MȦHɝEe!)V9H]0% zx7kegxltxMe5c;s'U]!V_BRyΒ^,>c2X$,+,PwMLuK#FW;*JC*dl2Y7d!<ܶ=F._{} }mBо6A8R <rөwW[%L8*IY*X\vLUFJ5U`}N*Y ֗J;*yK>EHJ'o ֧7CxG^.W$CkYZ2>$uFOqGRg=Lf(dQf3=N0ݍ=|ؽ#doƔ!i*ZSJ[ IKjlk%m&tt +7Y %öhS:1vt7TcK : Y_K|f (de?x :h} S6rXdGЖۣ'CKVUwC(K!dRVy#=:pŪ'Tv #j@v#k pA =F !Ӷ&mME*%oܳerhB6QG2,ykV=/t")M@#FK nUY`|+E]ކ|[YQ}Ul$h@~L5Ju)N;۸dfQ}ɚ["jr.GU"FnjkmFՕ!C7Nau'!OfoGx4~}vJXݬ-auU:lPϲFe @/U-KP({a3[ ofk] Yn!oj#L d6=_uFo҈ N+Ϻt>#-Ayp:_oƪDe!C6h9o1_r8y]%w<[~EH?2g,+N ݦzsyvjHp >l2YOrwY?<@s}WSw]xo* VĪ9N9#A#9>|G=mƪ[9ߝWCޞV(V3!KObv|ҍfJ~'m RjIj1R;ZwղQM-=5yK˕f{3Wؑj ΛHHɯNXW(ɃYY[v ZVU:}ƚ h\R@x.+H?;VgNBd!_ٕRJ灳F#rf.s4%s*G) o"p'L()+-!(Aȃ78@rr2Btpsd&Y[ d)OKz(xjg6HKdm:De?Cd0:eG[2j9<6#!,tr8Y}g8>^DNQUd9Ѳ[V䇯Б)AF;"m3]Ng솒0w NQMHhwou:Eq6LsfȄ! 2N] iѤ2mMd,S L>u;?%; Y}NK* dMDйQh1Eu/)9i2g܉ o:AU4lKZ)Zm.[XGo&C3]lc-toCllL*aiD[O5'"15BEdoc42dJ͓n==%'t` b*ZQEx'e1䏕32dɭKY\<؞l-bd;M#~?aP3)**lzU QQ֊ó8:М Vj@DmͣAIHcpV&iY;T+#͑sv H!IB &N!ׂqmFc঩oz7)I+  < dJ.TvAS\aV]t z撈q3K%m)} ֺ!Ex\OڹvȻ]?n,UP&u3ϔmC6NhI"8y6DmBںER.,?yĨ۾ĩmuSQ=c\nwQef^Py _bE,R93mqKl%?4y`R˛(7E82:@zK2cd_UvC!=ho[TÞl/bU%neC@ne\AN43Ap63A=ne$|+CQ>ڊ XKz,Gí y-52cխ |[(][$ԩ[1`­ ls+$lpڕ[@0V3H:F=MN2wo(CV.l#. P*7!ߔAoi!Qz^بU áDځJ'ub#܍VٙQsťo^Zx2 eH6uzmԼEiwH6,4#=l {Y\\sG0r.Nsq a7"EE^dؕa}TuJ1c}/]6ᖣQQa62aA2*eՋJEK?/,HT0."Y;fkHB*N$7ci'&jjQᮇPm4:s('t0 iSuhp҈8id5Г%OsǫC֘lWQϞR![J  l v.nnh~fz7a!Q܄!1UzRjOAY)R&,(U):؛1$iC[FW[Qj^+744',`Vv-HI|ˬurňGVHܲ!cח#ݐiU:lfö+č9qFfCԸoѲD4$|RMH) ASmw!u*jBFmEC嫐f)u*$e]Ub7V*$|S{9]C/uьZ艊rmB g hNk땭-/(/s0 <)U߼xUev2Ex TRm \ѪO?2u6_%cTi3Y4z~2%vNLǙβ] rnJƛMZӦZ^aئ8q#x ˩[q.Iݱij]g$IC4&k Y7Cz;(C|_>%8N?a[h@ ؙ1H Ky <zG3TV5z2BP֜lXSQ'qluxMeE֠ooCcWX-d+R1CNd,k dRMX|B61Y)i)x/xEAVw5ЄX66-I̧%QڙpGS=#T|x$Khf,轄!60dFG_WM\!)XWQ]G/[0[9g4daVa5 Z.JGL9l;!H 'K9ӯ|Q3hZmykX$aa_7!RE=!H߸_▜vz#]Y84d 6 m? ծԂZKtɔ55tx| w2LV4 Kͨsȟ+D|R _(Av|;'j""2Lh"-O㢆BvvP(*+h P&jRBrW)y;)v$yX:~NgVq.A~#Ǎ'5:r;U~p#s"ms±VsS涕Q^k]I5]Z{8ģү/Cw)GߓWSꆌA( ?4`mEwA S7a= o@a!H'N(IΨX٤``jG56MYE_6QRhްodPr ~6UtS=$f%cȏۂ"WYt{8Tk5n>c=ۂ S{-hoV*㯄g9 KdGUA{kA{)]':X3a-A{%v'h8lpr^,7&;?>h/AKwLAJ|Ɨҿ<+Tj {W46jU(p(8)јDbT%|]NN|mA{%e`^Q1aRgP0gFMB,ؙyfj۫sLς!7Wd:b+k3&2RnUk%N<?:NK=cQ {x*$&!TsOۙ79sE/{pZwxZDjaAަ֊8->ʵ>ZZVڅQn K۴8_!,|mJCo|VDWt rWJΝhgVIa.jD8d.Z5LSu%Efij*co >cMf &Yڗ5jl]Ke_"Nie"@8Vs =彶(V>DѭSFȸo&aB`t·-էh ͈L'nz,ҎQOY*Lsq&̳c{DȝP9zGZ퍆űs1P8#ވ| F6GƯÑz\EaelQ<8r2o! wFϨ e%:L~P ZkU{R87BS%0fp`1'H;F=ꛢ/JC| pP8!rFC#7tnd])$L1 OS=+UoSdې:uRWF_3>h Ƨot|!FcԯȵQ'>A8gҤW*>x$hHߧ1w@!,} DY酒!@/0qwB*T-N}b(Ѥ>߮8k^(>.62 ~;k!MZ;oOQ&(( Y+CUmj Umzތ @ j6 W.6P&)aswfTV9Z̕y} FI `f\!I+OC1Ԓ,|{v'hsfѭėKRge*>~2i4%ޭپTH_.!RS#N3DE=md~$4rhB%]I$x8p FFY}3ˎD4/|*<|29kq."+')yCt5!UaBM`^Ep6K{LQrw!?AqGVM,9sIdU0Gtz[ ;xmYFcYOK<tƀ.(LC>.x\O\Q\tᴌRf RRI/%QG"?,U1ڐ{J~C<499818Y>C{":d33f!&6wjx[%wxYuo*9`dtxUmjZ%;dk*f)~1Yj:?Y%+ 2*y|Y6*~ Se?~YSkG"m6Pψ"Y'lIG$;H+[)zx %-.dxmR)9r<ږi[J;/ lQ7lƌvXXPqx;.V,]-bw x1mFWK*yr\-j{}:GhlOj-Ark\-bԩ)q K SR-̌a!jv9˚:9<D6:9 < Y=l5,ΞVM"ì󐥦OU^|AY%EL5lK3_+27g}| 6͉OSOv""ϐ;3܍(yA]IW]J0 p7dsj9>m,bt9&ն\+ 3R#hx-wD" Y_T #>isujmH{WᯛSߏ}&$|G h$ !@tqB3 VM-j3~Y9~Bjn`%gzn9Tbf항Xk)e Kq+tdn8ރ|O՞Ц鲖Z vk@BGZou>͏N3nrjzzEǩm²v g7dn\$qH o M۶B~/;D,pԌXQ0j*7Bރ,א&&!?F+@V߲,uAS(X/>6OW Bg+fddj p:+kAF'[S';0{23Єl&R*9 Kzf.>uDtRz1ǥlr)C6",r.w"8i. K/܆,aQSEDq(}NJSB7R㐥Ɗj2%C^~NxH1[[YO:9<,Sd:%56D+C 3_&:0l=f&`l9pE8=Q q7r,?:#wH~7rǝqQ5]3&p+ _3%G|?<$2OEx:MI-jf fe G!KM=TkQ0mdkQ6dgÄ)7aȎ6U[Lp-)S%V5%1s;t)GO xzd 8ކz3Y'63Xz]:٪0!&&d"r8Y54s8ykbPr1$^ꚆB||N$2 Ц1eӐ+Eh,ϭMzeW3%e1!k<#Q3:C;s2JF bB=T/J3< w`q.y|Ҟmb9<#19}thycC5ax?BIrΟ"w1JGcQ%F=)Tj^-Fy‘?ϛV1V/%H@ɢwG!K-¸3_/9a\*f_YϮyS&ǀ!Z5Ǯjܽc!2nVb$>wA4/߀ArZr۩hV92a uM^xQJ(T*-/spTR8K`Nx 5\ XmdWg/ +ctCุU8Nӣʋ[2ktmSd 5-գ9MJ'&'t,"WZMWӡ{8:4:e#:Q~t0 |iSS!<(]D=)~ ?DfUBVxG|tH!w~tX)>*+?<YRyn+uK}.҅r q0Jvћ]v\-vlUm?ij0ݐunO1 pdR"{J;ŬRj| #$u¶ʾaPmt{8TkFf3"p[-ڵ %u!C{1+Uo<.%#ۆ9bA݆K} ~d/\~۞[5YɠR|sf͙˓ u\:Թ KgΝ&K8ۛXXcҭ} ]w=>uܼ nhoViv %+u~ k_bnVӹ u{>]UnW+qo&f`~LOL'УO}^ bKc?/oO/RDC[_Uo245^4&8kr-qQ٦fZ8Ud\Ǖ %C<4MQq (U/0!dsJqh,%u^# xj7 KMb[eJ5l0vR %?!M*1?v([f菅NcnOB>QY5_gJY8x$գ *-8dX"o|> <?Bof?-\ |Vi&֡FYH l VU:}ї4Y'IZ?p -2m.FwY9NͿt h苍|dNPs,8h c~..zsޜ tKnI5>c%_W$lPK]?zbDó3wVuq9NBo ƲIYFD"Yi=wռm:bh&Zucȏݎ#*ew)Zu N.൶WR'YÄQ~ǚhꘅ^jmC UQ7R/NLnHNXrV L,W$tS16a+% }ohSㆧݖ=}M"K~7oN@#6zyyej>d]8 \ѕDx<1f'pE~ Se7 F_ U=R|8nqZ7^|Aو։QpGRRvq&!S)ɚ0J 5v ң68yHz>eV ~;GOq9C%źIJpA.dA Mo@LlQc9Ed,W5Y3JnscQH^fdB4xŠ.õ o@V7ǡy{v*SXVZ iG{tY׎oɡ:FNGǐw3%)єoBC=RP=l69lDs/1&##&t8p#smD4gҺP5Xwc[8A:h3|K^+Џrx p6 5ކ>¡ٴge2|hsvG9C6p=) <%7 6JF-k9sڔ#-A~^?v\u!O!Kr˱H .'[VVxNcQ[W`!f*k26Q2Ǎ5(M;'rjCAU:'/i RT|6;mp'55qy\i':[; |NGqIR/C}o7-]RI 3MF+5,obu.@>L{9DG+Y} Q8[WA֧p Tw= !pt)266-t#ejc\:Au-[q>G=|0\\dŰI&`VԠ$p %7,as!^G13 -Ay .-JG|4!ֳBrx޼뾶m/g J L3ƕxB|SF6d.-9y8NE $E1U;$~#ޜ& ;%stó-˚.6]VRBy\ T:O!yp'6rROpPm8M} 2DƐ~i4vZM U,ͨS6c'K~YT5L/B:E+n %ȗaiE(LB]9Y.3mؙ35Zvrӵ0&u޽VPrݐi6 Y*3q' c8 <Yg?1,Ok01j &vICo̍5a!drpx \1l|oxX}#ȏ:b6̄%A'Kw7_~YDNC~Ȉsd]̙aݹmwUKVkFYN>R<nՅfER.7כUь́&ƊcKfYX= ΟC8X8[;~Pܵ֒˖onH={Ѣ393=J7JGaD3p JoΩ.ea -,ol{`I̺*~cLc.sF4z% Y*ߚwRrb߭{Ef\Mcrx$dLrz BS; )CV,o]{myyqԯJY*sf{sƤxV.my,^ ¾&%yEGzo*} itLN=tQj[{!KTP+>r;O;J~C~YV1]=~7u5h|,@.(GE1 JDT("3T'ވxsC.<{8ē5ob\,%=dmIۢZV_ w1JS%)H9 We$X~ Y}5t_`nD 8s7 ybGBJnX߄ASmZr¢./`H?ħfqB}UϨ0@V*>% +%y3/im.uZ$5ZKshb`H6Xdw@ޡLLfTKͼzcn]e]*}W`iF+*™xoZQy\ B}+ߙ  -zwviV– n|Mժ\pk?p=5%r BKvK~Y!|r.^0c#d:d;KExAe;͈ũyρ _9^XZo"%x)4^'n *5bHar^%C.ԝrS.X/9wޚdWOVd˵F:HzrlGyYbzEzv:9.9}dp~$S⚙R?~qUcnuk}ػݮC'LX+ޟUOG<؏Ah(aK+Sm5U~x{W47 ĺ\tl[YZ7j>de;Ae+n=KxieE1Z h$#/UJFBa5gV%8y({{1NA ,U8nzn+i1[_ȖR(Ѥ^ "_2넻 e#ދQoE9nϴ^c9W[HlzC>QUڴ$<D$x$U]餎{(˽W*gT2g̹On?<~~Qz9Nbf{Ss*>cH^64coXOI}uZL}H+k8k%!7Jgk#I28? (mklwj6Jfl'@)T]eb޴p0ijb0 pݝ-[YYG\Ɍ#`6N2-X6TUt e]pG@Lm[($QȏfDGa4 ';5%D9ē5#(;3ȶb R3ȭ!T ~-@_>,Xr؄qVOUٖG3kDP]++ D*5 '*H(2bUS3AƷ%*G |;NMe% :EQyrDuM?mkM^|1䁗 _RBE*bn4 |k27TDfW'p❵RWiЄ,U |TYK6ߍp&*'IM{7>eE>Q_ͣ[n,Hd<{Xy`r1S 4Tt@b ۝wMyhJO4t׽EMt{'gM >-ն%zWA,#ٴfBt@> Kw7BX)-+%ɝnƸ٤ βj?(єA̶bۥT|x$ۆ~U:9lb0J(+ڌx.Jpj}ߐ,jܢiԑE8 9N;{rN!ӉĢ1|6|NY'{ Z{R VL$'RAVξXns4]^0IM,ئgujG2d}7=_$@Z-]rr(՜^U:?!R:A4Lvg}SeWa[%]@ֽhnʀ0+qt Wvjs{W8jWʗm/M'ۍ _'OGW3#tDGBdG < 9oFһ1NC/HxT R ;on5,"tbQb[l95,cjDp_k B0eDSSxL$ɵ;.).Fϥ G!K ![Oyx}SŰ-,Zz.;%1D>toB7*QK0Ζ\v ==~ ASmZr¢[e`$3(*%:%4xy' vm$6g ݂i-[N XYvRzjp t4Z4aeztTG,ei"J fjJ`)\lC<]tp=)^^Ro['e8lvIǶ5Ԩk#S~v^jCuW~hK| wxtRr=ƪ;<2^`]f)OPu)πCjBS̪$02F!)hFAݡ#J=+$z)Ŕ )} eP҅F脯5WW GK<3cu@.d3nBix$|NHܥ~w{Nm,á>۫ZpRZؔ*݈}7騱uC'(~0\8wADg9B>YHA~fP47aw!\kҖEq{Bgȋ!sW{Bgd#@aor;;]cgah IrGGO{(0,FO\ck).YU 8s٦siZDg3͛f-Zex^l6R~x-Ac9a>+ak)4=޹iԶB*iZ AB*J~C\1G+a-`M\/+&(75~hUE3 7ѵ'/݂U6]vD |ͤYO bv8i4vzB < dU*:<6I#*W!_F' _SI#oX1q\nuS34f }2l7mp?4/v52AЧ0h}>NGmy,ش5hql~O#bh~>C1.6Ǭ㢕6!K9bsF-vֱUʛTPuW}1>0}|. ?2]VwxL1jK߀Q,#sD4d)Ӂ9zȅ8Cu*%xe=k::]D#ȏ˖~T/ CL.%!Me֞dtF9J FUkJ'eňGXF +/+eʄY : RN]He-aQSy@1tPɵ} _\\r_G'ri캯|s G5 Z\}|wBn;xpu~gh>jaO{#fS^R[-ccyFiٚV| x4A 駀S;Ȭ}j]Kl"w n,9~-^-&y'HXeUIsZiUĕ^E8)e.qmnmaIi Bg|h @?PLTf 4V(3G tbאvz*QLU 5q^\Ad0 @0UGG8Q/Ew(gj3?.GiǨ7ҼyҌn֕ЖPo3z 5WRހ0aevC1 +`B8y 5;H;F=5at|FAW4U*v`5uW*)]v3`3Z*ϙer{GxWr%9.(HtnM^0dIT_B}Q(2+ hLB=[\&L@PgKs&-8q͘yZjTK`EݛyEerqJ'ІͱTAʯ쮹12![Ou.`4vq'L޼K[3_< Ⳡ  n US~,~UkMRS:_B}&lXZ3I ~Mf*vz*rn4ڢ5+nhr7(Re|e;BRo4E4M p2瓣 nϠ S! g7jo~l7#flqI?rfvj?6jO?cn-_V[´\\i!)'"薐+@H|B}&,3g46͕R1ϮF9Rϙ6-w_岽l}xī?\żHi$NGf>πa?~e:Go%Zk5f<̟?Ҋ7i(8'&aBnU&IN1HW+-TT٦ƦFiS?U͕7f'AjWŪ -#re w_ܧJ_ %{JS` Ð/<Yu"kA~?I̕ )c W*a?6T3$l<&n?Y,[w-_+;O`("f?M&kظ a?]2,k(bO %!ȇ:|SS$aOa?5|FOa?5t6¾OQ;b< [KScS6L)ӿmtuQjXZX=<;F4`(_nhג^BxzzaQ8#Wy-D ۤ7x`GA݆K} ~;?C\yβؓA68›3w'NtsΜ;oM3:5㒭^FɆi/'6-L|:U-M*`Voh HH}Essrm棲mvdm7:U$l+az$_b8#A#[_$y ojUa^VzwqvPB fNRVٱf7@Y)!M@TFz -(d#@B(hRkƸbn6A{ @7qHuC>hMS U y{j'pJ-NhRZ+f9Ďkh3Bx5D& @RaǵMn,U_մq8ģI+Z6P 憘| q{TfR%ö"s ep7nvwriElVpgf 8wsY2f0oowpihE B)Y7P aJzy%5iR A!KMT&prAԐWM5^HtR(XK|o;ײiho7s ?eE 7ˀIU3gsf<IrfE8!L;=?[h- ΛA /ȯHĔZ{M3ϋ+J4W]m b")3HrOzT QʉeJFVpјvCD$bKn& nS_ ~,ޣx.[پ7=a |zT~)GP_:ңBn&> K-CAGx*784!]Pϣ ݡ^@rQP?CWx?;T.{С^g= &CÁep#|W@xπ{7 BWp!qnsבvz[Y OmG8 =r8u泥0Rw{3 "]yCl?B1vh;SppyL׼?@>ҍ70^󾏂 v:rQQ{ԣ>{ڣ>)3~ &Cáp#|φOAz?!n)}?'?9ҎQӚwȈ H~—HK\A o著-"쾟 ߛK!Bl H;yfhDmi͛f]MMk%a;TZu AuhHvCux&CÁ+p#|e!:1aWǶU$N1%&yݷt!-POOv=gaU:5#ZfqÛ"F<Cv715k5#,ʍ좳ﯝ`ɔ.u)` %~ cuy֯8oi ʗ!_C0R^&ջe>Gqb` D[L\nWv9Vtm8v X.G ̾|c3:Xo$rQ5[h(:b#*:jEmBVχrfD͂s2,5aͧe^ %9< F -^!%dt,ef{;\ [=̚Y)U.Vqb9| 2V0ln:Vs}_ԶV"`pÜm{#Zx:])Qȣzn1d}K$x8p FFY}3ˎen'D 'D%+0WܺY*d_ӕz^]6z慙aysUXa/P%Dp6K>,5 f|Cw)GQ~S+;E'({߅Qv攟A1O>Ő"MK‚Հk vڶ{ Hv_.x9:Eɍ'!K#],Q|],!,4rxel&w]h oA^DK`rQ:0ET?~sYʫc` r) !>xil?ΛsgoJl ld Ղ̧,Fu2gfBZMJm( 3J,T4sȨ*Դ1KF5w F5׀w!UV#P=}R*Y9VQ3ϲQ^~NxHvK%9d :2hyͱKS \RhJ9s&?|Ŵ?iZ =!N %7T*ْsy|Jr}}8 S@9PD-ށ,uwP`57_Ә~66t2UA N :Gay!_(OrBV?($C6VbޖS l?̀rͥɟ"I6?O"a$%HʱΉV˦#ӶK&La CUW=yqC<6>J{H'jXD#"YŸdZBx.ZcC{S CW֖7oZՓA#kڅF U PԦ)=潗ebՄ]p/d}73N~ ғb_ܣ 0J~=x?@ ؙ?}A~шH"P_n-zk_IxN+'̊6D8dR`J0d9t3.@576q@F-B0L/B[(?2r W#%r;4I;T۾ʈF1fOGP,:*O!MUfH3De Jveo[<ۢ7xx.,0.<]rmbrxRq ϩs.;D𛐿;T/QC[LM)p򀲚zR!CJIL/T#?cSffe G!KM%~iRn KkT"8LB>SIB)S<e ׀ KX͗ KMފ9C[T:;lyn_4Ch~ YAvX0>+tB1@Zt+t(1̦Xg!K+]/o6.vDz&s[l#BMw K ;J!G<# +D$QZ ob]&{9i#AVgZH(a~azǘ $ѿІzg6Xh=!.퐥֢ % yF&ۖ=;{Uߤ %;AtVB;x0]Ԕ =C.(_44qd}{pݣO>/a#e3yOv߿$ m7~eaG5~G͝[;"xZƎx\O: Y5gUf}_/9%M?ME&Wc*݅ O%K,NAf`# M8yf/^|UYR_5M7eHْTGf[GC<4.; ,WIjGat60˶Ef~E%0 YnK]$IEwP $`;(^N YBDq7d9@+D fPr@}>IV ߸.(,V|&5oC{\}#f9?Z)mfUMV4Sf ,vх-ކLZH%!iQ1f.Yjm0eB%L6u_oCE\]U\S\6K¬OEdWvi˧ H̷C%JuY<[bB_Z@ˢFn]/{)gg/7+jF BJX٫Z5#rhcعQ,d^Zt_Єljk'It KSǁ $e&7A.& B>^ LFㆮ7`!ˇ׿ Z(WZ*s]4Z1!N-%w7t_hB& KEuidːOo }atChpbs1dz_1 RGŕt%/!9L>ʾݙz'좳k*2AHdi' 7@xlOM Yne%9( gk!>da=D]TIl!K5SE47e.=jMsإ-J άo/95Aˈgx̂1lc6~Svn1xmܵ]hIt_7P1fts##wsTD/]B#ݐw+kBj֨.ӁxL7a~(~Y]쿍Œ1.F"rKCK6; opc5`zC_Y3MCyFAM!⨓x\OjTjTܧgW!_ͮ#2|Oڑ#J<9̨vvxvne3jG( n# PILu ^>l ;d|xjH䜰 cqqCΠ!z!7qx _jhcw;A}1# |C>8 29 `7JF3/FWJÌ9'n$^I%N J>8 =p@c#r:b+WkQr@.p@{cF%>: u}I%9.P3l8R5P=f'9a : hp?d&MRr(fmf{ WYW5:̞3 uvBs-x>LAh*,xNȺ%G(J]F MrqO7o?B;-濉FW[LJCS%93e jDCမX' !6(* _ sc'1btP --}ȟ(ao o t@|wB l#% >Znh}8cyOm%C?zŮ~B5dmt{8TUy1Q,óU-|Ep]8llP"< L|fxO[0Jʪ:m9b^c M-znՕz%ۓ8MDO?cᜣP%Pb?%,wC4F<*JnhB6t6پ-%zqS(0z@J,=(1ʼە\R}(7pl/Lp?dyL2j;{Ϸ(B}D' Em Mt8#tYRUuŘNxﵡ, qі( 3Mh d/!als`r!ɰ ݆Ņz,`"% .*"q,7)'B5'yCZz6  =x!,4u\*/ _D:4.R=ɪosB}=ݸ0ZVǢ>XYo8CWM<.pWMMyՇz&V'^+-5l!)d?[ֈ3 ߢRj "9 j( s'_v͜1>~v[F7!F7#(fo.I)d:p' } r-z%;]. {/Dd REeKMK{ ~L8<`jQt~A;ؒLgq]lQۦ8ēFN@Zm4^jpgsih__Xp*<1 GwO Sl)I/| | 2]kh b lH wA[۵C`*xܬ\X/{'Ӑ/ҋBA7 NHށ|"Cx51~p<5\S"ć x: 5ee Us=ewY=M_NLue\O q|.eA5ZnXG`GT:]Z]etBntGhBwf0q6; dͳ splP'?Y(dx>%O Eٽ:V O;EoFe.E9:tve>JKfK!Ey1`ɳٹh[E@8a^9Kp ALn #$Պ{"G D\DkHϡφ(mSF?3m9[1U9(wG>)*%'596ֆU'!n{#M(8*KP_p,"Ym5{ðki$ry[;R}ح@P bU՜0q88ɒ[[ JcV =X!,S^`U8 0?ۦ<ŒDWWiy"|%9U7hfQl^ S*2hd77s,grDx9q^)sh)SU+MeUmѝ4度m+4hE Y!V ߝ D㑛HLus^m)(%Ck n(&v)R:-1*ZC-iEA0T:ȈmR-/vCmO[c-3kiV̶qM]c: Gxpc, VR♋u+ҢTZ[v*;bDo.זW %̷MX{tY,R|Ov Zj -<;Ens)= z8JP?bIPx8JcO^E{GRbmp⋤+aKvccDd =%wo1 /cH8ނ)^{^8 v>T*vN9iygKWOUvk'XA:z]'֣MQ %y ApW4|KlAsZcaŬZ@)t1|W4O"Oo=0tY_T!5B=`%r Wv؉^6F"tQkF+l!~h-:ʾی0^jVaбly_^u Ix$d_C/e5c\naj55*ft72tX47W+Z5גhuBf|>U% |^#G 1x &q( \ia O-,Gˢ6OF\p(U0gK!>cK܍ik0kZ\7b]O(l),;%׬*BɴDq |ȍ)OOG6Fݰ/l>mRGM+0cٙEv9CgU"m(WC 5^fXqJpdؕRJۀGǔ4M4q2n5."{$5L[›0+>U6m~޺xKKǔ|Ot1*Z[{`-[y~H&$E?Y k1j2].r 3 q^Ac^)J:dDs?bJ»"YC71f_Yf<d 4JlW*f̱JcpNcLX|;r(zo$=1ԷS-9a֭\LD>8lv,,-W`/Yˎ[;J 5 $ K) l9eu/,9ZH/[x>u1)iXZ71Mdo-*?z XDi/ ;=]A#YC4ܞiՃ=l\sY$X>dBn)oɄYއ[{ګk!*/RX8p*TXy)aH`A/"0K |ɒT}j}Ԋ^FDiaKROKlRv$2v]W!1fRrnv0z`7^JKRכZQoJMZ_̀;h&QIRX"'[+'('F)JJ 5w=N[?Œ/XҋM;|&7>5?/&) p 8Lzb@RgY6y?;\MA7Z Zf}_(tkS뵚g>[訄hVGGiiɲ9]BoW̗¹ \\S !G{fgX^VƪV7{G̚g}c-AfM0&ĹY+^ [-|w|lw~Ì ٽ2 KǫbI%5;/B}{J ig,ڜ1\HKd 4:|>q*v囗lKGnj*6oӉLf3e!SL]5_'f̧~I1 c!< g,b+KͶ0"x?Ȕ)E!B}śJ$,njvNU~vFQSԝT\p# n--KiI4l5$< Gu4lt4lowaڰ͍F!lؐ?v !,ݞhc͐9lH82@ fs暙7:\ DE^e&}?x}_F ,\0'ڟ61Ka_rfE؝^qp^#Kkwjk;;40Ya, }co̫fhdy mM,t0~~$Ɵl$1ZYYlwU>d(UR??_5q ],߮nTG X 3eD8 ,*=3(2|J<-cW“O}IE*_o3}F֙3֋xtz\Uh%\_aS |Jpb}7~Y o{xfU:bDz zOΘ)ASԠtɻ$ZCۗ꫔d{{#Ym^I"8Yܿ0 0 ==9QF2yK1ϭ N9 V ׌DO1_].~ vk]_]Hk#j:p7BjS(~FAF!XZD`/p_$gq­pJqh;_JqC=F(AtU#n@*T7=ʎ0 gp^vR %?!M*َF!>2w28iv(U3rEЮ%ga%/d퀶Q?=㐥ZuNPNj~0a2=+usC<ڪzQ,Qc:gyd0anC1 5c$|KϔbMp -S5xm,(%ϣs) |@hzƄ|HHiFUJƖVVkRg bv]lC6ʃg^V>$3ű\}ufXRqqѤwPI /[C CebBbB`{%LklkUm;'F:4Nq:Ov=g*8S ģoAj4-Pc6Q4]z'S[N<](v폳KE/o`sl :΄ 4YGdKIQaAv5"32 _x݄;::G' E͊!ϫSbJv`9e?/HiPi&-E9Q]-8ē5 fGmCW,j IGm/+trϡzb8vó3Ss+֯hVuq9NBo ƲIަobπ!j>e\͚x7n1FׁO ?QfvBЦi; ~t'Lh1arY5ܦ0Tbiz('&Ns;1mcvP !J%tۜ5b಻5vu1Xѡn0=R,%:YхvÏ ]IFZw%IESc^[cx[hap;<,*cjƅ5|J8Y%Ұ$M ˒ K+9(2y[JveQ7ȘJv!ggGaGZM9h2Y11 QB͸ֈd*Cх>.=;pzdM;[xaE To)7,]J3\"Y ?=9Yv".MB')[Fb7[UT:O,5$.'g!K⪊s˨elk"C#2 Kϱd#BǒAe]tZM\Lp d&.fT\;Sý KK ? m,7BK/3ː/++RE  2)̲ g\ssV%E}dJc{v1Z$=Nz>Bf$p6N\@;bm<zWrMgw b\qΨ8>P^vZi 4T念{)+fFx YnXLD Q9&_؞O[9Jix-5=aƨajc9lbrxaBSq3Շ "׃a;F@ٔڞfchOtuNZwYc.:U9)ΉkBֈ0>t"p#֨>`?=\>^Ԣ ;ՎQQ|]G"ܭR^Bp+d}󺆫9 ղXc(S6 Kyb)ځFedap'd+ àI@>1|Ep Y-`-gVx3Q rS b5a K_񛇌KTBc*n8Y=4fŔDc&6};GL-|^:GQ 6_W̼p&N{ Kw Zqb,w+p'd޲o2r[5<Y؈bC 6HyL('έK-1 \gi Pd3&` f_,M` sNFT/h7 4o{iD8 K sB[tpZ̕~c;Qf@Z PTQ1Ǵk{ѷ iGEUW1u!e §!fY5RJʹxT06=BOrGJZHy+2[8=V 6gzd}ࣽpc%V}SW#wCޭoBBŵ@nEB6SNwRz 8y*K]?Oܮҳ+~sxtLdrz:כSm9K}g+ <Yjxc]7R  TV mkAV?$ީPG M4ŝħԉ8p rJs1k Σ1{$W &? O~w,TMձv-Xީt,S7XxCC '+AZVRW]6RqcO;*8y@2,2p rJ]KxYERa )t,Tqk[1N6oV}Cs:!K m oM: *!Mo(ՃȪl-̅AŷqG[xV.!}{1JW'ZKs=a"%A Yj tP3Cq~B^$_Q*BV(Mwt=vґDkns)rbe,r%f^ : dl9j#< YjeuDgO wlP g fs]Oz&`nA'b,CѸwpXsv@] <{"N; h{6wBNij%YiS᎕!ih&1 aGAևe{,p$ "y5EDB?n-Տ azZ$TGBktFBDUQm"],rLOUpdi N#ãجdjqZW-N#NiTF`jqZ j%laFEqڇh&1v< v-1 g _ \%k㓳K`2L&.h K힬>55 δY`嚳[\+\Ԝ`/:d L Aًg鲂``e!צF|4_k;vT|xҦQF ؜1Q˟s{ FwF(Inj]-bēp[a&$d^,[;3g9 R{9':ل  Gd枇*΂Na-ȷNEw|%J#`;AFЊ7N\7̙}%d] };&"| wF7aU7(%+9svʮp7NBK6*r>cr+ik,Wvi= v0%kD1QS}쮵8\x3_~p-筼|'OA>Lft9}x@Q活ΔnùD~'½uV&K }$;!]sQ-ƓhbDr_!BI އ=Ar4`0/a/dzևD*_Bk iMeBآU(yG׎V\-90lq5r}ɛA;G.HM$ K8 yŎ;Pc89˞59gZtB^Whf!g[髿$ͱ; ݿ*\7CެPWЀl8;|rSteĿdWkCr mʎQ>̹ޘfyR)VMfыL01YgrK[ >`GY}nKG?xj}t_nSD"ۯ!u]W3P3fwML~@wD8N\ D/wֺG}ns]4%0ǯk_9~զu5ͯb:M5͵^`!one ee*e V6E?U0dFѥخ-|y"%[Mb?DZu!B8Ĭa!Ey\Thz{JD`ϩS?KE lr9>A~tq'p/d b*Kg/B} sGV|A m&}6I~&~[f5rjPɵLKC?Ff^0`l IQ>VAF6\i"sxe5_K($ +:ʰi,fY w)泂6J5]xb[^S=ʇJ4W9A` HT!7YdbdMr*Axl|@c;JX+|Pـ^s9`%ECwE81d Lw/Q[X4<PQN;`a=FeiAS?a%˥eU <(CeѢD$ @p+l0gLmқA_45q"MtwCMmGY=?d !P^ .^|!5e!_TVxy*x %elCa~D:Afڬ#6s 8yCCnP#QrESxe~['p -ͷUZ^.HMh:Dس:=p25R!{`?!ilC ؜UdFv 7;(9e.ͣ93li] &Y26ˆNקe!+5zpc4į^8ë;>-nF2o ֧!_e7B>T{ 7k#Q_~-l|Jh쏕 e/ܖFA "ce.~~@[Z+s/}מ.>Q~p 3m#zZ R+uKmp3xQV6FP-wg*R%dxm:&>7 K9L1?MMChC<4gH *Wet#H!Iį?873Z:88*8QOmLN0 4i?ai߷^~UI &^ m2 U'ZӌFvљ- _4JP-b5XC_l0Wsspy.rrºfwWl,rƽeś[ԸA(߸ڤF~ƈG͑]- hmgfs_Ɔ [;X;;d| ^nˎWK85Dd^mOyYoaV`rY7N)e1̂?~I$~?سek#͹KuP6ZuW+EE8!r%*W4d Ws_O4`^;8q{;rO?б dU <v;6wd :k01| x>9 f:|| ^ I av-3OJ7 ]xJ5y3^6TsveBw"z gn 4މN̚qmgf9o *B:ϙ4b[Pk Ӊ7"0&IF˯TIަbEd 熠 Z30A^m=Adzͦ&˽V,s✝it4+.>*$ڸq#ȏ :iU ' b3`͚E_p̝uzj\OBU^|>aVN8yBY+Ex\ނu'E V; wh'tk8uJ?G T8#xg`GhCT򛣪LRD 9 ͜4^[8xaK΅m|%N@x ?CcNò`:U$ &F0Z%炒fptżr{=kA^Jb0}; s$knK}%4= > `W?^rb&6r|N7s+(7܅vuDXIj#Ucx8/ٹ4!D.%ECڟ+w6ϯJeəEflDґ Mx %muoܱħhYWmvd͝Zw?bwP|n13(ל *84s[4d7֝uuTuxr >A '+jņik\Y>NOÄc|uF@q_%|0 4j5ЕID:dU61;uqeT%~v կZ~W-n'NTF`jqq;C鸽n`NɣhP]&P1P;n;l$c'b(dUoUT-CqG,yˣ+얘gӍGnټDiV`­8S- a6Oh=+M-6;x%7P16g/ynIպz o#UvjQo}43b.Mb HO]}^ȽM;T6`d|@{2);)t//5*byx ~){T*y>rƴhgs8fy~py|Όp56y;ڲY`x%kJ8f,oXUJ ʼp7y [xQ˹*mX\t=Wv K\aTd1A4]p'〫ﱩpQN4=0!T[Y?OD_^|EUލ+_ب5 a7N1o;_NT6pGZN?+h,h={2qۓs'c=nBFmy=):7uK })Ra7}XҼF_Bn1oϰ~޻evg7Â[CGO ~~m>X\i]g'zCe)vg79k/4nΈVF~g;`5,& YȟmS!yCzʄhಮ#`G M$NA+2>1w/-HCڜUϧHΈ|{#9TakQZq'uyAxòY9NG!LeB}~dwVpX"pQ5K`ժYi *Cٕzh}Xn19]+J y6ZZtz -*~++`uޫxmC"snٚn%i2״Ծ ,_C|iA̋LBԙᓧsGN2tмYH:Wl~m7\ؠʬ:2G Zr WÉKVqtp}`LnvΝɻl.|XV?~v|hj^{7jhhŜi.,τ5V{oc# /6:c+ZPaQV oLƦM}_VO/HƹGeԪ"{0ի+a<$| TEI;S6v`bӪ<)QB oJ_)TfZ)7A{bl^DXcTV-Ѥm^:LF ‘Bޛ;O *~?xt54klh&5_^q]k:4՚MA;TcͅZ)2oΰSLM~8atqh#*f#,a9ZNсc + ;A Pj uiu~nkp?d)U?| J(xv/l8D AfD2"1Il" :ysYTȥqISQC ;PldQQCA?](I9GibM욅dXn͢KD8>He/83?fu*{T}(_[YzW+U1(:F=GRi|ô/ZV ABzXO2*7% SO%8ēFN/B.[80΍x]I)Z'{F5aҋ|۲W[<|:; [>T xi쫚?β0VxMxpd!m{{X5nC&gI-)F@iPqGգh +!K/iSMLo@j._Ut K&6d*Y:<-ǐ;7!kT[!Kubj>H qyTYĈ C2: zGi*ѺY*K/"k 7f'5VFB>=S˨hji[w!̬ҸR isujnT] ֤ \qҦ)3!3!D Zہ 2k~ki<>yi9=Exy ݅vll)\'L {3gr fbFJ>T‰ěgL Db,gfxW c,䬖68F@x[1à ͏T'3UC[ oXet uz ChbMSa,c8* gk“hs"xxUOhCi/ǐf uko߅,A 409ēFNCVH[bF62WgmQ ί sݢf%h2S}-= /N#:`T"r >r.ʲg{cĨgf#;+<NJF"4MNLp \FzE<.s6Mŝ^1_Da8 YjâxOށeeg FV6cRE$0 9)<5h3F.z`^zrnc lgm[F'{!K6u<&G #%eGC bxarN76֒w9hЕ䰢5LSH յ<]V膻90c K0Bn\[8W(CgX`̤e-8lkA> =h?E4z!K֋SW8cPL^eMp(=Vq5cs٬uv`£.#wⲒIlGGEק ^Du#pp o,O4blAw.4)oiriZz]]1f,gMfɓNy`E쌻ӋN~e=/./w.U>in"!vARVS)u*GI#4F*ۄaq:'[l^7R xdU߼x&`Ϻ&?0,7Vol;kI RRJ%_^|]ޅ|7S= gTYzTs^T}Ho0RPx}`P : 8gau0{֘(po-nNBַ$("}h#UF,ZT|xҦ5Zh&%0eBL5=F<ɍ! o:nla`rvɷ0,Y2! 1 Y=9| vŇ辖@ѱ@g.~(}7›o*N!UwXN-" Y._eÔwPa5m?6;yCiu(14Qq'qrَ]Zu_h=;nrxy=߅Ĩ;9WRq-wa' `iCŢ宴iǟ58< h x rFKf }Ul! CNx) XwLOjbsx By|Ԋn9YQIxJSOO!?Ufzx[% N[u.D#8xQR|u 9s4Id!r5K#?A&tBi{ⱁC<6u*n-p#ddz YxO yzDWyDs?E$v=RoR͔)]6mjMWs(v^';ޙ){^jͫUSh 6iM_XJ]#ZH__*cLP}ujMѤ_)OPFM/2KqJqè4c?xݻQq; PF <Pd@%-KހpGAayv^s/@݇uT,O[a wAR̛xO͛OC>4YB* ,drk^-]2ECR\cq K<8l!_0/qoYa]wA$ 睼=C-h =RW_vCE8pRgg!|Q9Gvd"]JW鷟5VpsTWC,] Դ4n rsgl.w|] !447,uD}J(\NAҧtC+?o?+(m )Q¥(։c` ۚx4^%Z=iۢBޫaPd@Abn,Z ^>3y'oDMr٨).D,37ǬE˙&Qi'aj}--%4쨸sː/+pSd]ElAj{q]+9<0IB+ /F<Ͷ*np?d5F(?<$oy9.` *@hiAŷqjпg{X4o.Z&-HC#l$< aC˾qCs2U4Y,2ʅO'ڴe.T»E\ UWOZ=h6Ɣw&kBQw(.,$d}G:6kl1^/SCNBJ'PR,sg֗`]-WN7@~'`fVmKQ."!?W5.4R33[i[{\gQ^,"M_H/jI}Y%l"vz]-Az+PoM bMŵ[g 곉9`}67 l}M,_><gSq}}6[-EAbl?\G.ORd!*I[jJآ T|xUg|dk+x+@ƷbF;qXu)#ڃ)|3BIrTSx񤭲OCM11EY3bnπgflnk$HBhڟxg ܧ~alؼeq_N=7g-/ܻUr;/Xނ~ٔ=_h럏@ =Y+GW=z'W`p,.NAR~I-_z& env1PXvo}I8]ηgjơԏ9T}+n ]{" I/sNx@zx,\ <^|Ej? YzEl1]hXS_P$6jM(qآx8efQ:uh+?3.2ߞ<{K(;F=]^S.ORϰbk;p/dikT Fj|7(,.nO&{w~ }VvT|^{kTwjt׈18?|bcrka7n*jKuYmd ψzrwh3b~dEջ6 _" n0}0=NWF<}n{:%9+W.Co +t+.[Jsb웾%e_-|5]*lWa _j=Wa 14v^Fp*jP\!OSQ8ē6 nuk^D;Pi]n Et0)4Fz. +{ to@ ^鱨1ұ63l!𐎛e+`ӥ -/Jxu*\B"1ހ/N(G]5f+;i4:wZT-Mj &MeqЈ%D[-S !8H)Cp|FCq-]>x4}:ew൙*U(?S冖CF=菗d셦lV~&v/#4BVh?~MCֹ!CPLŷqրIhC<Ⱥp'̀h 9Y!Kk!G`JuдW<C :,5دaq' ,:e7ip K͔ p5gRf8Ͻh/Cជ˨HNc`|ZK / -x[t܀1ך0L8|f|A87EXe;9GSKe$nc6*?(1ޓע޵$ $Lo'p"ə 9$gxk㰖X$g'@Q7ə6$gDfXL7HrFnIr&OIIΈa`$gl[;U/ə仴$ Irɢ5.l BIΆ -eSe]$g#*vFI6'%xզ7,ġ^m;ƚ$gղmϙǎ\:0\j0~jTKMi!!)2^,Tv|S ~ʞaav4 \ͰzQƎe+mZO,X.`Yށp;Qk}f<ܜ5I]7gD,W I 9g1U_J 8ē:s.uL,OP/OyP#d2͉ %ؼ5~ Mpqdxl$ò8lKbz+,p{81a6\,!2[fcQz)zAPmE^\y0\]<}ϧ%#rP[dkKܩ^Beɚ3q'uQ1q;pkvW BFiJНu|FЃ܎7 1C؜__0Y)S7 <$+~V%ȗ4M.n4,ːQd2]N@P!lzB۴b0<Jz4<7{YF;E@}\+f '؄ SKY^G-}3>,7K`g9sAēMlA1걉8. +cf>惚K^WR]/UFթf4\7!L_TIǤL/ q@f#N fΟO̩߀6N;*764TgR`e=u!'ސ~,M_2-W@D Wb'4T^igY%GG1fcMrd?&Asڼ_{֋c\8W6ZuW+EEm?5$ S_ WK2߀BT?O4}D+[4|A*#b 6smpk[igbO3e409N#YKp|Z&ׁdmGrGҎSs/.n-AŷqGYSC`=z9p9\R8p 6Rqk={u2Ld*Rݣ$)5l xi Z~(,F^-fz^v}y}מ.V(맗ClYEGt O@:hRsr d".XL%D @ooOi4{͏ } PopGFL2Ra :ygF a?g&sM]g )[4p8-59s|?Nۢ?WQmzPu⑤swekr(ęz+蕶߀,R]A|ːJoi]A_k^wWP_5}q *@٤ n?eȿFDIW ƂwWP!ipx[svA8@B[,^rP߈A 'k# V:+%HljA'Kzo,}AlCŵo(e[?=6Sxrj| neuц)egyR[ZzI5q~1cX^h]FUs>"<YjQ̾!T/eߕ뛪$bzA)I], $'+;[Eħ[SB\x62,:׳NNlQq&%E}5f5 Jz0e[sLiy(:Fz?H֟Ed+ DEL$NI&HT/+Ghcf`3 Ӭ'SnN ל%˱PCFCd/'"1^,,,fT\8y\Y=genj YPhw10.]҆ӭNGhE)oE [9]kaSnmk,ՄTtl:D2[Q[/#; {%|IT&fț8W/[y])B}Ni(|T+.C3(3Ldw'!Ojӡ+"FoB~3-N߂VH*m(Ҹ9O+]͈pinu:d)3:a_sMmlc1TJ8ē6 }0+I LbCP*oC`;3շ`e\&ߢ+^|MjUu eU˰/ΉCې9oDqa$%Byj.iJrt Kl*ŹY4Y@`U)q ;\zcyc\AR8Xޟ/K)XR? (1ݢoP>hm5XDYLiD ?3 vV`I[0_u2nR/ .:]9ZFA`xhydБiEDU& | T[C6GƬbc&ѼH8yPnwP Y*ہܒf2Lp򐲹 OJ0d /gqLh4!,; Y*Ъ/!Oc9ꛃڄW<Ђ, :+FĘDBl$D j͑]'vIcq"K Ai-- +ZZ-D +^^mܞjSHW;-KLJV-Vzx Vz-]+bZK%<^V]^ho :%ǷO&1JW)@jsNYg &irT.F]8jqr{N"dY5*byx -eS6-DUX|4etcyG{@vfDu!e.jf-—^Y9}evG5g.N^׺q%yTp it9W)֌S(8(TJ+A`\ <0x;Nȍ5cS,\ &ܢ"6ac/@"*[-F mbwIlԚQjE}Sj[e$h~xz,kfX4?fzůq~"<`!ʎQ_PUEK¤a7neReF2,@j" Fxe'{ȴ--YZlsͽ jOĉ 7s%xeǨgg̤ wIU6Hqg6)|v"  Mͤѹ aػZI{`\:"=0 ܡ̦XxoRȰo#O0މ-B.{30xd>dXvz)hVŘIIHL3Zі2mieϖ/z&90im1VsXfY C WC;Vv((;F=n{'{qe9ŜQjcʩu/'/5!&STupd"Ne]?,E.XOa^͒u\g 06 8{z`}R'<;9HvBOtWm.3hfGOJS I[BC0y!y25"'\ySDNʑq eǨI|M gHFϨ>#B&UEv=[!=-Нcԣ.7E$<|usp9Lu7g p f!lPvcQj0 9֢*}Ð[%㫠ҏ#*ݰRaF n(V1sy9V͛Xgv+l} Hr'B_`@MS)!7R > ­v<鶴S(3Zۡ)! [o gb?&7/1jG ;1Q U׼앮6#afqV0շ>r膧VYAo&ہ; [ v@o&|Ζ yv\7c]>s$yKMs0ϡE˶U`یTv9U-B4?9uT?i~VaO4?s|M074](~: 0/k_9~զ5/b 5M*X9_a_L4sbM0/k_9~ŦEjku_5n%X ee~ MKʭpL2k*h_Br!;kd}e9ǀ˰/:KX痡B%!f_E,76?ߢpG,xN?v`=jS#:k[喕S|È& BKPP6WmVC3_wzz;~bl o Fml}ؾMq}'fa|&{x`@# /6VuXQ†گ VVX~{`36%Mf@OsO'Ҫ;E`hn | 3,\um03@F+QJbpmj# !w˰JĖV '}bbC\,U9uK],fhnB6Bo&l6Gk؉7/:!yF_htZVCx&Hy xϫ; _f#3h됯7җv'!OJ KvQ6(}g &7imS&DVmZCu)?d/p?ͷ*n;* eJ$F,Y 传nF"YS^,X3 g2[WrLӎ?f)2uW4  #٬0dW,DvOg9ģ;T )nѴ&-?fEt!J.)"wxMeKS\ߖdVCMIZPj acvPܲ~v;$Ҷ sf&Ȅjp?dGrvBvk[a `%WdnxgjwWVCKX*9i7!W~ϳ rz*xQBuͭkQv}y}jE-2YDUdJRUm7XU{ ik FWDǹ-+6ʺ{4@" k+ǨVSC&!ZY^f a%sge#̀hM6Aw퐥fw M`6 ge-}E0*x tG$&$!9D4yLGY&lY/8ҟ{4I*YsMIr]J& hǦ8T5nÝMƊ)ߥׇ.Tvp9l.="kKO7K sua$ٶ.=)SҒ>$duDaӏcrҿKo Bwː/+KʿMn=0Ӡ0+7iP6iP6U$wMDA>͉Umȷ5*txt7@x]e]w=>HGZ)6Lt.[kv겧f)iK:!x%# r" |>V8P `&s'{Il8]3d~3#KY9}ccf;Rs:k/s ^|AVgg2 z:fZtkǝ6'{#4ڼpY+4Pqg'M#9B^&U_WsֵKٙ*B`9`giqM.ʾsrΛ,wprRTyMO<F3袵puq8yH[w(GANB⺴6*C鰭n9zqrjQzU:Gh"Fي*sBIځƊl |o={md=T FVs O܇Yrem^|Eu4 fOoA_`z"{}Im 8|& ,5_U1UrE٧ks+1!wvB>.iQjQzUB_kܔ~9$M$4.ƚic$ʞRO<2.I>`"ib+o4 4YRMJL,$MdۚY)wiI!'&-3crO<W!_U~դɣ`B8y\xd*QI%*$M%'$|9FIlԚQjP>pRq ^0k&NTn\di'yݫRr(9gPgy e?1qC3df)rGuvh c=g8ߓQG,(}3D\jy^.KJ:ȫ7ü'g,UbPn؁gIϷ/s7dd|ÊLt4ryY,*&QKڒZ^RF >Rt7x.SQNT\Rw*2Id:^Q#Jh:rc)EޗWt<ZnWR26c('e7@vMhk˦?k,KTv8;;|xғSQ5  ݜ5 ⎳ޜ}a:'8y^v؎'A{sbBѫEw8ģr0%I{V2|83e^ٵhg/*_V{Z\F~F bĕ(k/PU8HEGtن~aI8*\eܼDMBfe^ ݬ0jm=+ =쳷|~qP_ϺUwEVї:8iDOxry_SPvz".w<î57@ M/2KIX'+CmM#NM[0Ek{ ;"0r{ }C<ڪo:ݣZÛ0۔Lt*hGHOދn BBt+ic߆V\쭩/t])H#MnFVĝ\]Ql;pd\m{v;4w`a|Op7dIFn.+]#[oEhm.ƻ0mΜ鉦.]Cgtq|M]NC6TCӜ//5߃9is|զtM={-6`54ۃ n'I HG{|bG|bڣ?=>1Zۣ?M6G4>̑0E>}n}T0ֺ476z;'4?5n,5.f?02Ǧoi|jHzo1Q;0u-XC~$k{%[MbԨJu>Zu>E,ו OpQ{8ԛ][t}K?hYԦ*Guֶ-+F4H_rN1gWRj sGV|5xmC6gg4_܆מsF%} q,z_Ar_A֫ݵ+!B①Q[C%)(!z7*~#5@ˇ7@uԥ*C<.z8%wp%w>-wvĈG^RJ>Uui%׵V/렋(^+K|Mm{k KgDPyQ5KS-1v\$Ht@m.h"16Í孂%:Δej^:ehʅwS5 H9L@ިRYM cKi2/K'qB#TcMbwjwz%9enOX'<%q.=(G+.Fs۷9mMZuX6gÝKE6}Sr]ǥlq^\dgyVdHSN:0^xZ*9+&ll3tvBzBtw$ /[a͒$E&pSaK6ŏ,Zh4.V{P>a?+qk@ +i #<zqíAU< [r8{򄝸з4#繹Frerze+)NZ_xl[򙚕ºŮ{R8Ċ*+!3sZP1ɾ~]yLjږzEƌ * ateLۥ+=_B EuX,3Ys}@Fz.mȷ빶~W%UnÔ l|٣ˬ; uA]WW/dtOA/Qz敧=JhfxE5pg:KIdeMvv`RʷG\(al>7gfη8@f,7HjAt7˩m3NΆxp' BUQαWMHU9|QCO߆v }Xt) QqOw;! >tGj6 CqD(mܖx_!i<@ ؜U5tQm-kT|Sx(@i";8l_$ Ynz8Dap}4">Sskd?0d#ćÉfu族 =,5ȯ)3]#a:"m%ZQ|{ ˹& l#YIl xl@{8'<"G i%񤡓Aȃ:oɶwHMhel6.? 3H!2/J f7T\PiҦz"?#:e 4uiS'~|s#y6]5&ہjWl?ڇPr|ڹB^(?Ŝ]GGWyNROG >i&, +e>6ۧ=r'm16i/d[FB ս]h[xO' xеb:y'OˍN\GugÛNT6d )Ù{/GWNV1uٴY'1I^A%,DicԳgd[A{(PmaJ؄l@8yRve",I&V])6xPp򰺒%_v\={tA]kէʜ5i9DzmNfF&R-Lfpfe\]3 +Sr-" V.͙YL4'[ k~6gE!OO4G ؜hSF@7CZ=]ѧ`!mП8CPŜ8y@ٔ229<Y*oDi8Y*HLx %eͬ,됥N}5h8^-[ z&oC^SpD;*໐UUH!6!ے=ԗ/(Gʞ^tkQIײS4񤡯);]A&NTnB~~,Rk[{BE[U+gڈvOŘɢjs5G=YDl),ځT'vpxO{ xh"pd~C>u!Km?iPqG ?oN,,Xa ;$vHN0FT\;;`Dv_FcÔQq'FGj(YjޥII>3CT>{8lyS zM[ oN;MC(U+ rz'ZTjO;-IgǫSyjͫU4c&2cCb( dI Ex:OqC%i3(C[iGɥm޾myӫ#K$i^T`DS1<: lhvf/.nȻvRzd^AiGi Q: hcQby~:YHknS鐋0h­SX(F/6 {whVY*Yżj ɪ>DN!k[Y5Q:d}R۱M FbNI;Qv26B!BQRIQq xIeIyu145+u x!u)I,2TcCt!$7Hb0G 1y+ 8,ก\prA؞Nyv.#@D%6-9Yhp!~$ܑf=iNvu.To>Q8Ycr aJy@}yW>#&LՇyZM(Nd.ObtٷT-d}{-xrٖKe'!K|Z7Z+PQmIj F=!o,X}Y$89M: K*Z5ݐ9*c ÝѬ*ZyXQ=juOk^^h5bDŷq(ߤɄ7Y2'1QYfe5#g=uc/:(_x%1be~jG,!wxV)A-96¥.ѩ] bx O*d𺎭mzIԗs7ߗQq1cEs ф݁+|-|2sNXBц ;Ye+p5[]JkE[`mw!UWO/o>&S@>mn}8hcH.U@uw@[m?_wwAwO<Aa)lv@VL/AO@7Z]w}^Fg!K]s/ sz%g)akC->,ɀO*.aOL_!=:z *=`|FicO{ Uƈ)5,C½FA<ھm׈kktʚ)/-mAju Gd/@!_TV,|6ڄy8hSio5ǘ<Q<[\ƴTn8MD:{!%г2Ar λU6q*Dm/d)ӄD2P)O'!O*ks|M߄f |>x[ D=C<4±ƴp3CEMC,ClaZ 6 N]/le_Kl[|gF"kjWv;q|v`+/4_bK?>3Ro=[|z mJ;^R0T\;pkǫxB;Ez>OZz@"R#ck9З8uS|v`+;_*/|~`+;_*)zCC5،+}8uSlT\;͗ |u/le ͷn7Ro˛ԩx!JG/<+%իF(]Yj4~2yྗun+ jesqt78;aӁtzGF3 >Ξf'k-T%W_c-0Eza?cԳnkS|~; w(9/2ѝLq $<Y>qE$ʎQ"Umg3jxzPdO4OάR?! KM42u7& )`2l)MD O?-wX)]#k2)+p` .y*v\ﳌ; ރ;4G/{{V^N(k5I#~ ݥ.$r}ρ)!e֗_qձy0' &WSiX„$ [eǨ~h @l+9:_M 234} '̔h8 „g i ʎQco"Ec"M[GCޯ4 , O@^kGǔbGkk7ȿcc;p2C-s2^;өLQB o|SXഌ-ccGa3tAFwlDUK/۫hjlo |ˣa]08ɲw\Ѹf\%a ?kކ~`V츱 ,,̸]01(<\?URK\$NaȇnH38_ӐO+s?"i]SOiΤ |[zaҹ!N#^8#W+ _6d^Q>D?릟~o_Cڷ8Tr #3/F^[z0] SgO6ϝ9yʗ KмYfkD~?jPjz}vNOKAbi*utF>,kܙ[ϐ8*1$mA$-\%~s(re'fa|&1{x[XWuhYl°ZdѷF?c_Hs5}ri_Lϊhd>kVp2cኣsGFL/Q$qzs: vFe8Xz%률KK#T|7xZr]n래۬puӖ*CQy'nAwQ7-Q9#6٘uPAxekԮѸ#͊,ڡ$:9$ 왳 %jiHV:U:Fm0kC #!y J'X k ^)ۃU(귲Yfbh C3qSNARт:::,?Ms}7BI "2灗!_Y󥹄bO@î b8-jVDexuJ4|.xYAJa%JmrtrŶ0|{8ߔhp5r諃_G*!E :9TV% _l<j@pxz퓂]|}5/OSi!K sjI8Ӎ&Ex\|WnaiYQmCLZfY 9󦗍6PNhH ez ,D_'jF!ހ|Ce,ZAώ(zX&&%=I1fZblItE_^3`Jh.+(+q-=w&' wX!qkg<0C0c5?ܯ\G± ?!Mo/a ŲVNPmMAUA=H&̙5'$GCXvC6hڜnéDK\ DK,z0:r~LېPe3N!YQ"r'7!T&:%m,a5 V҂66F4& C3؉ s>OR?FIiIݜ >鼓W9P!Vmv/j\ Uɼg>.rf8֧?f3+h'U.߀:nr^(%jl9ۗwlxbN^|QY=4)9Oi)nDfx ('>LoWn$,ݴIS/N۹"B /AXp aBm 봅6ͱPt{ 4?ځ oRVt5Agq.K(j38d}W O3񤡦Qȣjڄ ֌O]B9'W!_է6tC@쇊`Fľx@8'rYmȷuGb= lk`"-l2+C]㐥:nQaayDÇkR^s1bF|{\ IhYYB5-2!44 yFNtY"e0mJhe#X>=o bj</Sz=%Ĥ c6Lŵ7-,譧#Qa{%b' K-f uآ=*C$^' SI4 ۦ7vOC4<YʕT_c`#A.5 ` ܴYCkb lN-hTMķl]犩vbƭ1ך-LK'ܸcȏkQ{OÝ͛~SD|+PoL1ځay^sG'ܘ/G2INlg!jF--58ē6˨=7:`}Z,],l,*8yHbO-Y թz,(D:1 甙_hΗ=Ċ~9h{V:3NfpqI/M'_`?:Q?WG.D-5,Y\(s@~c0EIŵy10Lj(}x C>d&۸OhBt(V')lin`sb(͖hHCO@Ni&. KP ܫ%Lq+Է( v8{ʢTu\%Jg&Ĩ: [9;7ݵw-L,IގZN yhFT 4ب8<Y.'ip|CƇFMoP:[' aSxnk.;FuIr]Jx{{xF%oD}|3BIrT>.F!7[ycr>/2T*Y+tRa ]ג,8fW ^xMp' {l*:EyAVo !*ʿ oum.tH>*vFI6'%bo/LܻFnY]jX6񤭳[?AOҸ%^7,F) =]f Mq+i+{pH*<|t9`+X[dCqXe=TM9In߂й4q-DwdU,mokY3*nwZP߃^~)ZKzi FRV,Sŷh<@(gKM; PK%wl,z U{ӈ CE̼MT;2 ,azTJzJ?-8Y˕S!i)O@>Yv&|͉6*tx E+~|HsL'}_C16gxfΟ/28zR[FϞy{!aKI|ߜ|p5&mn͓ L(u8d~b$ ȥ45{5ݥJHކ,w5&F=Ȏ>6t]ea%.RxQn3CSe.2{~"Y0"td]O(#SB|вH+NWrvPE= q Yʤ< f{9u&kj7Cnepp8"7^5*m8 #< 2ӝ5.}pӅ-wB w4?[~胇PNOxOoO*ӅKgr>bnέ,WF*pvt<3V!pUF_1W9=/*Oj8_˽p--_*#~W`Oi})GN*? :![ 8CФgP3_ 8gVNQ9lgLbQ^|ۥU=5i磴9g~[lY}oyB.\Z[nt|x`[E+OoT-X* o6ϡ_"7T= Ԧ?C!TjQ`ݩFgZtB>i z!QASnvuOrPh1?T OP)G܈2U~ {dQ6Ĝ|h 3gCE~;X}v']sq*gAwwhgxGI#'Gl VߩBM9^} ؾWmF6Qi ؾj%lဍal߃f[Ts&H{Artm!zv7/Z qߋ/,E6|i$-jPD,JWhڂa +azST`jS_8]=WȞZ0 c䒺'89LkcȯN KXs׽`L_3!ufhk_Y׻`ُ~,2(];{n|yYjgc٫ieWX%C؂Ur"(aL0ڤ q Lo҅JLm凡VNP9tidL!c~Ĉr,K"ԪGPE1hoFu\A=sTk( @^#m?v*\g4{[ Y}cJJeȅ%+ʔB"J_): ~ #LoJSR1 )vEA1໐UnhzA8*pd^ۡp]576C>G{v3D4 #'A'a-ҢYxnm?j?(6x='7A<^)Ag@g<Ůn0BGg $~~V+M$~l[Knjh(<ڀ8m7`6SZbuCxQFM}s 4k.Bd֒qKIj/1kZ/om !ws՚+ֶ.B}_aʋdPgyF+=ȕ g|yky(b5/6.ޝ53,Yto{`^y;xN/8gV&RkU1:/| A.^jS b5Nd7ſ{Z_L ,7 Ė1،]ł2ls6uޜ]":@x&ơ 4!Q j.#G`J8 yT{`e D/82D:|DU> -oT~C;Տ&J?N~f zj/ T ~K;]6VZ2*ISv)QOsdEp{Mo({z {UBc\_S m 5yoSG rɣNWg:%p~O;YniTIYIY_k'{'{'{Zh%C!l^h?BѯqaZKi30U*m#0?Z [JO4%IjS+4&I\WFtXx4D«,Vk;5B:]i[d4CP5 Y}r6:cOOw rr1Hʳ>g"xr3J%,=k#Zn7 Ԗ5 PEjسwec ʍp^xM%7bQZ'P! y&ZP]*vZUcPUIZ)iR:-/Ptzp_'= !7'8ԻbyuQ C J3B_o͌.*ɬ +|G_ v'y' 87N+)j嫥a z)_6K6ӛ6S7P+a zhf0O( $B--D`;pd}]Y; 7Bjˈ& q#\7c|0G-V1S㟢E˶US?Yn3tƉ0?5t7?ESNC6jheE,]3㟵4 gŦg0?ki_uyQs{s=&HGs6Iݵ0G_h 0Rus n/4h\율e%p3dbƿDhmh jr/ai܅,nIMD`j꒭&1+X_F -~C: j!TrbWHݐRS .*~x4={Zfsw`=MUdm[V Yj/AA9JDBDd?x*`ŷ[^ېMF} o7lMK7}X^G߲rjPɵLKC?Ff^0`l O̙|_wzĹx10VkZ]?ՍQrhgŠ|ru'fa|&1{xXWu(, R+P^dշV?c_HqU9/XtG|hfy'W*V@r2az8#AC|pܿڈb? 7PJgV Bwq !'T-u`^Bq)WJ9ģI#FYfRl q3)e-TAŨ dT C¦kh7OK+ihVsOZᎶR+롉jecϜ/D" Y * 0?-g9ģI1G?4Dgm 턆!K%~^ @<(i?!ImxY:(VH؜y73,L/qvہ\r-r'SN&I͍z#Sg".'hyX*a%)eX2k, JzPvZkba5 ,7(@cmŒ>$ iPH|Г8wӊ|62y}:$N4Nٴмp+|دf- ڙ0n?^W䲞E ^CޱrI:]lsf1F'm=x<)Nm76tFR| *{*HM5wh: x,_N8"A l}՝!O t@t}AuA4ZiEᄭ4KN4vɾeآeOV`&5l>20U;iN9z-Mں栒FHҡڙ 5տ*J'qbQ '3o=ڸoegDڙ ݙ`0я{ O)hhgqqy7D64e-s.zא*Nm߱9'/:Md/Cy}b͗= zZ,f،㊚Qށ|GR0%ooRyc|BWvT|xҦE (5r諃AؐpR#Pw 5S`YNcu=\ 3 }ilE E+1vQB+ 2SJ5o:MyV oh򱗍ߨ-1ӝ<A5d`o>|A:{CcrƼcen}kri#[0 eˊ2A-0-k]: v>0=N?bBe1a-Ek֬, {+$"T|GsflAY(,=~+8 '_cf1~^z)tޙEIW* N.XEb_NBLOLsr{58w<1I3Ft6Bַgl:+PZ6Dس |x1L ZS$2uE#f"x揧8J͈O4΢blx=;D8 Ԉ;$$W'IJvB+pT=cr+o?<ߜ "(z>Er}zg5T^i?JowJAX{o#1 _$۟سex  Oits{ӐZ٤jU]߄uhWs߂[ WK2M߆DTp'm#u4i1=LQS\^`rF6%2~JJ|P߼8yPc$LQqm,䬲_b.| ssN<_6F,oB~SY)hy&K~&9Ý; )Ct F2E-͘C 'W [igbO CΧH;c߆Iuj~ۤg ([,_V%2=XQ ǘ"{eo!*fkJrq$OA>V;R|8y@K;߸xiIt?!mJI?:Ǚ(7{vzTbē6q@͙?ꨜwɈf{ H8kA<`}8ge.$^[}Wq`T,)MRqC z+V`80G$]0*Vߜ5h_\MƲyYQ!^=:o1áBc?֚1 \Ls~ٵDH^nLvYȕ Q83^ Xj ^hGۡZf]=*40s @~WC#{A*xҦ-p73nNzArd@Ŝ5zjHH<6׺(J4ەRqG!Uݙ:$*Z: 6-i` 5 f:|ʊZ*J[+s9Kpx1SgY=B\*l+5)nNq4W7/ԈMKF%lMQq瀜h lZجumȷ锃(4A:ȧ'x4HNfț2Ϭ vud UQc"[W!k0n瞡z쒸!:[ oi9Qqk\KE\p-hyf0x+qRУTl*# |{I(ivBl.`SΜ + tṪ:[4ݼ>\8贜gc'yuoYC`ELشno"L{TQώm5[ 2 67 MzdⱗCRG]2Ȭΐ,Q\'' s[Ӿk EKGFD0 2ѭa_J+zBriu42&"x[ ˵+lm{:C#FhPW'E uX0CZ +I/--C'{2 sΎ(S6-%$>ZRAR%Ѧ$J,Z)'=RhmpwÓd2dLϹw{sgl|o+loE4"A|oU}uvW;Qd|y3y|?ʬ'gD_3\C.j˭.rR#12}C_L''u+Y o5YU.Y*o^A5»''Q Ղ}y+ > d}ew%?Nq5.ErwO9-|?E.VI4q6^%VjvO{vCw< d{dE$؜MYPjLݴDNuJNBh4/! x姭4l`H$CL 2wx \^F.ysyQ'iO~[}j+y7B;4.&6dn5\j^dP,S$-ssLJRŚۗ}iΙwh{cu\,h"AּKXhmqA>YY朝 =vz=M$|\땸 1PNբŠM;k ^J蕢$(FɠR"Bb˜Y}(g9ēW 6WlǕob az}QXиl/E6YI<~\[};I9 |\j XW@NGGݻ6PxPI9jG-KPY&-s2ߋ`w|01*\7IE(k~EQhJ}T:"kRڬZ=#&P 2shj6wbV^}#I#vNCW5(]}P8t\S8ʅc&Q'jhE(>\zMqa3la۴t^xI HT'px \;p7z.^^h3GT?U~/A~!ے;G%NQ wKu="I .nɕA`$t:ݠhjx< ~Xh'PDkʢ6,ÄC=#_Pi@+4L_Եc` .\ n9$_ȘSHceLx< 1pE}Ϳ aE=fo)+Xu#.V7N ;:oP~tAuk7LP~;TͼFm?о]⻱Y7qBdJ|ڂ9kѾzÓ?"%䮧QLȸㆾNQU'nD5te%B| mWxvO8~袛ѻ"5 POP7LGŅφC x)eGGc{{d/m?f#$ax\~Qk 5/#G/K-I/+lȈҖuSiAOM-6FEuܰY//,M#w()nj^%'"*%A= Ni__0qF;8ITYm BK+/3 .pQ.SY' l ņ{hD]tW̃8~NĻ̾RM=jg3Wh65y/Wv_^cJwDS(%~Y=or ۷\{ڎ&WW;_|B}S6k06g݂iE$ eVwEϢ?{ oe(%nf׍ K0-_@7 ]nVR .w\;p+VqL<MY&ZʊP8 >قYٿ ~7+3la%*$IjvNC^MKwwoRfp \jnX{%3la%ؔ+$uSdKw<W-;;_[zkʦPR LqS(k*^yN.pZ+/ |d\lN5L:)V^2le%;_Kjv[Yy+T߈}a +o,AWx̵#dp/ޖ_RA /ٷڜc_=MDO l'g@׼Ž]։j|9AHaoaYK+K*>T#p?&hd͢@wjw?8).9)9SCG;+%<.uQ"33!/aBa/::GJ:߃a;A=>y+~jr\Sn^ݽIS,>c7PPE\}~-cwBw^v;e%]9hNyx_|H%RXЙTCV`;A=D&\GVXУ06nJNϏ _ HdϠe+$pWϡp|>ǰeI3d7SQG_L-s jh2m| x*6_5˳ ]w*s逭-*lDw0Gi*-^%^ h&$d *766fK6JJ_vzم8,{|/p2 ^||:j񀰨 !]]|f"򻨭X=g+hx*6 [NB,qU^rHJo]?e?bzeކiln6HBM}ű5(Y`PTj/)yˏ[PCfoa̵5nMn3N7H? Է]M8d. Soܑp3GpǏP#ekk~r!Z5?;~dܗ&j|n\5K0 jq1VpǏu͏5?;~Tl>J j ='OT{O?O6GmW5?;l~ whq)ƧFpO6j|j4G_ j YVgpu5?;~T\TJ%wцsx(Nͥ;?G~iS:9sᑄ;R+\d~x4RE7E+dCq̗ `r[le.Pc%9d>CQً-3iZ ,_uzĵ2`RIlͲ+g_/ۗUq7 :R2M1.$Aoݙx1t?u,5:†-Ů#/8E{hnx])E Gw\sr\ql()Z8ڐ_ HɋudѥnnY FbUefW7oJPiuP \*wZ];e (ȊU١T*d~3x4%&R"kT֠$[*$u16m?˶*^B"<~LcNX׸%]%a {z8TqVQ9 '((fj#nKΘl6;sg[֡ o^y6Ա\2{W7ۡ *b%fUvJd0H8#Y7XCNZ'rXtKc,tvm5[ ~ ۥЦW:/l0a ZXUZ W24a$X|S.ԑ.zZ s$,CRI.-YAIn֩prNv]I[&@J cѰ@PVځK7]Ki 99TV5h6R =PNr預QQioE˝>| veS웷ρӘ7 Nypgqsͨ `̲)WKI={ڮEAּ;kg;";mguCʷJ19P _WNp]q.(*8>m[e⬒loۥ .9!(pLꅟ,=9gB`mw&Wl-A8<~@ۀz-Ez(ILp\j]H!%s=,xV22e2V&ICJNςM@##M%)ypN^tFZd]o_t'') ;wԫpLˍ1xn6~\z4,EC br^RrxmJN ΐ vKյEO Ӓ-Z%mI[N@jSezm\Y=hzFpɩONM柚 r028Xt vn!W@љz̠N`0N,7oGΟ]B.8!-[MK؎jj\y _jSi#\J}[Z3S5,dpMojgه ;-[D&h, d]foCU * Uq3'Yd,+EG2hyn؜Z|~lԶ`8FE\iZ,pv8;MQA\pz?4WDZĆ/_XnT,%I?-+y7^Y; ~Yٗ7۩8mEWI8y5fGB=̾BL2"xҖ 0m4z,; (6ݷ66a4NʶoV£h*j&0i $8>մy#r;*W.sb$Ƙ ~dnɻDcir2=Yr2~/DC؅'OÜ toIT<0$Gׄj&@\Qp Upqxxz9֏\ךc͞M9v,or !Qr\~ 7g!IpK?d(LY|s6x\n*GG}) D{Q¨ϻ7 Q3^l&E[|xҖJsc qQP(F6ln6m-[ŤKa$0;107+=n+ ] ⳫbK:q5g6pcD^ \ Cȁ!*9Q0hl9k0a.Jp2y9Fn k͹qʹ:NO]O5p싪2(q9|Y夜A_q,Xs^AǑ'Z>$s4aG6I|Y˾yx\X7JNUuU{)p͞ԜdwVܜd ^>Cnmpsfp}fu2a말 ɍ;.u;Y/a㮨NDV>u4ŢahžDbΒ<,7Z8u=;T}I{zժ.\~g\tr΁m~;{5p\=zM3d)Od^bF{NAbٶZFCnB;w6Mz wZx6$b$[ixie+0jϲŷxѨӖI B2ݑpWC/sGS;,,Ա>k'mwTשzB6w7 MA=OdšJPǁΊ5  ;d+FBYMI ,Mi9 <.N!sYYpё"?dRmXhaʫ[1Է$%62Tq_\2|{QPF?5'Rvxzšӆњn է˪^̓7ΐ~-[-hgmsϥWP_ž*vr.6XTçJMQ5 dg&XGGSsNO(d_-YJK朼-\ÄW``]g M<7=6^`+c8_Jop1.S˵z0JhǣFȡ njsǢ>HBP? V8C7m5i,ls)mY1HрQ2 C'/\H3fbbQ27T.*&2C<2 7oȊ fImӵUZB BSYiX l{2!MvcwIomǑzgC<_FvFvf7j57oVvӍs"`$߲7t7hi- pkK &.)ab\=W3RS(kR#an8b{)e^Ox4r`K7*;M =ɒBkVV$(^¬za>)~9ճHMpEiT ?Jz@'߫v9bIZ;02P_ `ِ-RW,7$f\0rŲW.nk .Lbπ{߿q*o9a=r\I8TWcl؅Jv_[Fp'NLjL#{8ēv݆$y`;6&֛폘1a ɶ'p" JhkFIe.'})dtHҹdI7 ɍŘƚ%zT'⎚1jg3s] !֮Nٷpru-p%E&7hP|J V2 <~ ? <.b%]3ܤ/kG wį࡫G#rOO빎1 <~>3j3joBR,F%rH[IIm.:|t\W)53C}cuC&?lBuug)$ׯ\]) kToeP]K>Ī+?܂?(\)yp}OQI=<.0/1pmE%pSR]ʽ؟R6 Uq |*^p $+VڵL-Ֆq;0ƱՖ=hABAAnbIy%Uh׌b O/_O'SX%sCgWh:?n8+9\ZßHd?.5B<v ?dIo| uΨ/y7 nS[FEg$rp7-Nuy+U2)v'dxШ)=7=͉|H9Q=nP= | \*Ko>|\j "շz ֒Ioy!vn7䬢pG+~3.IZ/ F%߯7n׹ A}$['+Ð~`rS&h(墣1pB)-^eI2OGjUM.: ~1=ނhd2x4~3Şnnt2!mWױ| ~tx9FsJDrE+vQ3c\Nnl#8ēvMl/ ӯjqBqՎŚ!ܝhّG+&]4ʜ0 R@ȝ6ULͳ,*/bJ/ӋeߞsJP ,t5s])H!w.S1=#X~ mOw(y'X$Cd09/N0v |ttf o2obtd)K/O;O.y՛l]^ᚍmUZ g7_@.i_Dd^/JڼﰋR,YK}Uq9)6>blticZk6T ;N"7oHtzkg-r@EA;md QztACz59Qwڏp΋&F"Ŋ;2]!pxAa< G(gƴY|B.MS(hu޽ձT)JS.E\zX̷qآH9svXE't$5|#FIqT5.ATj+o7œsAr[f- &Q6;A?cҿݾ~aHVvzý8vbќ\vU+sEvh7 ^ՌB_n/%g p/oYHWT^s&3]z%oΎ#VCK;mąJ8?cRccO/y]o|"GS_j;W9U_$]໔t~R4"t߯;¹h]헠`;A=S&G6%^AEH0yj_jBVگC׵VZ-}7 P_+,K  +Yl ʯvz^s.n /i BϏȄ*^m0)6).JJWT̞ͧ=¢~BE=]s VgLkn&QIhheR%\7 0i$5ۖ%g jΌXLpߏr^ M$< .Zn [{ ̗ 9eڰu(ٷx*8ո]휅kM$P_gzVh\_͂K;I"Rֳ⼵XʋpjF k3Sы~_ - \Vm ?a=/rF_N1d\P9=}? D=t?N 焵.j%oCϷx-i߇~B|uͮY6c:?0tv¯1|x$dl?&+HP>=ݷn,,&|" .u!ٶS˅OPqsA\jHZS2 UoY!OK͍?kl;ڂS m_P;pGAh;p˭165߁;c\-wwP5ށ[nQkJxn.Vpwuw5߅;TlVP{T{-߃Ogڣ|hm|=o>ܑ0f}FQ77Rm6߇;o|U}=z`m`K?H5?;~jApkm4?7>x=Cx㇨B3?D^ŐXyQ!VVխQ1&[AZ6Ac[ݸ>Ud\Z#7/_=#^r$T dҠra˫6 ! X|G ߤO,=k@?뢟tOo ROyH45ceߙB{0,'  _8'NΜ>qdK;X}uML_E_|Y}}d>|zG 3{|{wBŬpM[7n߬M\KMgk^ k~پQW$ב21oip_% }ċѠn־c,JJa bwYG47 TxI r-T䁷ر-> m=PRp޵!ϻzj5:3 a7xڪ27_(dqPQ\.;uUjjd*P*2C$ͅbQS$Ĝ?qfʳ ̞^\s.z!V$e x$5?oS 6KlT.tpfo|7xҖeb7RX i_/ݜ/ͧ%lP[ՠH-NH(<XVۺ=>\D veS웷ρӘ7 ^tb~v %̽e%̭h,U`.l[x4M$W߼wziER:75@eWwdAp~pE\9o嶘jK&Ck"b˳[xf;WjsnNJ; !Xuqf8ģ)S ulGwZD $&]uآr(:7k442…Mc AEvG*c=Iӊ 'r.uOdn?(۴tPҘV;P{eN5~ T҃V:a4i7pTԓ,l4B5sjG]pa'%'_?^3Enа_xS;+\E MCmۚ@d |rو';;eGŚB}m߈yϭ6ux\%c墕0kaܽ+>s19<~>;,;5[)MlubVC*XF:z!S=ȝ9jTKׂ!ɶrŇHk~`?ܚ \J>%| >۫ϙP ^)uȣ`ZƳz+]q%ga//mfarm Cߙ/ہxAK포kușCː6hʀSc7 f}?$'(|VU:T1jx$2cnj6,QϴWVU&fð Bj3 Z~D? qf)5Alj%lG5A5`T |u%v{ev-وi#ܩJ}[ڗ3S50dpMojVͲ5 |mewZy/D^G!5jV1h ?Q} 1xҖяHPmRP;/:~;P%L;ӵ[t17繡ٞ[\_%}I5{v_'Ys"<+J`MO嚶_Rk޴hB! /Z: /(;8WECۦz\ϵ{k <~%aQ+*?-vr +N_|jӗFV1Y#~ŗ;b|KuKOE6oIPhzbE{Y55zT=c-^c-ՒԬ[IA`[%նa+ii-!N0&gFr/5uj;OBIcQۤZNj;ɲ*ԇ?d xXv'XèN,I/kF6IJ2N#M ys]Kte7^7^z :'Bp(=0E ]~yPUߵ'F\A]좟^'9 y[ᆼN4;~a>~>1M?ܟ41'εp_nd?Y\)?.zE߼>4} mI~` Y?[\^??mYdplyr/build/dplyr.pdf0000644000176200001440000125301614525507076014353 0ustar liggesusers%PDF-1.5 % 2 0 obj << /Type /ObjStm /N 100 /First 821 /Length 1274 /Filter /FlateDecode >> stream xڝWr:Wh,BTJޥ7 hfHx?fla$lBqtV7Yb&%,Q,eK#u0g\1J0&f3i҃LilJ\XaH&# . x)a 3@ cR& HD\ubAdqd8LA PW)SI4b7N< 5@k%p/ItRfgbx&^ Rư%\5GR+\&9H0!vc#DTJbBLPcK'%&@Љ>pGH@ނ&D@*bΡ \T8) ud$T(0$ 8PIoL4BӞ"GIarCK*E ǴrXFd ]!&$bR ȚtLh2ѳBH" 09%N ᒊ:S$sb(S7xO NwLbA2%iPRÏ?-}=57Y޷pÿ?_]E 9 = - <|F۩z?^;Ч{և>k֋ۻa^64iilgy=yQ'F hx6q̚b6Qhcr\3.^[gnvtPv|ڲΣƃW>;߃rۃ%NU򋉾}%5&lW)1lȝ Jnz1p!dgಧHfrEoׇ:cyKyļ4;5;$cw[mM{VRo Lr-}8[śyƲG/Bua]Nȝ ۓ%@]m+|*[p'j>:}Ȇ˜PiUYwf);;庞 Rv۩ Hw^g@#k벋:>eg9ݞ/O݆@ۆnr}CEoS;PuT ^ӯ n4Q]_D endstream endobj 265 0 obj << /Length 884 /Filter /FlateDecode >> stream xڕV]6}_:cl'NJ3]MŬZ[0ʼnmfſ;0&̾|={aP0|/'(7/r{Zhҙ+ }WU;$M1qGx0B^Eƹ֪W=?yߗywkEkjDž߶ ~ lSgW;Y!E痹l[;f+dF+G`/?lK1dX>^~_*\aa=. endstream endobj 310 0 obj << /Length 1115 /Filter /FlateDecode >> stream xZ[F~Heαy> @s\/. i2k38lWJޜ[cy?V?i- bT^kU#7zJJmRd~hRF|Vh/զ@#CB9 u%wE3-ǚ~汒jXeJu B^"o{j5CZc8;;竏\4fLtϷ1ʕœ)QnGWw;ɍ'v"J`}wj2 *sCZ(>W_xs-U頎(<_yb/ I2P׎>2>w&8{Ehoλz Ck:&q97DMGP(rږ2 N r=: v.#?7]%ġ,/[qHTe=D!?pD~1E #  Mvbdd} {1S/ӭMcO^jdi>t<92f:Ч.n{O~IDa!Ff ڡq1~@<= >w endstream endobj 378 0 obj << /Length 1645 /Filter /FlateDecode >> stream x[r6+8^I3!7iIvMܕd3C!^Eڴ O%wIxsϽW vqŒb2P#hp.IZWMvGjs,1S !1BL[):̓eh@P`/3/FQPG{ #<ۓP$,6i5 "ST߫l`(Aw셓 dm!bn~I#[Jݴ+㚖!G407Y70aErȡ򼜚&:kڬL[!#9)׺^]CJ=g!vVש.U01ԉZ%|\>UzS'pq>: Qyv{*8> Q e*TTDnT,"[:1'v.kL䎼FO{8V宸5>f}$&zmv`ɨNBht^8nSj ZyOFEٴɳ| >BxM}"U+J;Y|h!ksl@Gqn:v 7)` $<Gn3Slbܚ|P LE֚+[褹%meK wWtmY}:l6$ֶ#dMi;x sdvo/+mj7 ~!~;OCElP6 !ЮH6KH&3t)$ P(;_'mIs 9U">{ f"0W\V |~R_Br]d12TƋZ[[ !)?Rvs~5 60 ^k&Yޠ!0x*)E{JeVBt$l=ÄN :!":YzbZ<,QD1;Dޭ6eOt/xT 3sGyE= }f2F ()Jwm^4>t S|~8G&; endstream endobj 203 0 obj << /Type /ObjStm /N 100 /First 870 /Length 2271 /Filter /FlateDecode >> stream xڽZ[o~ׯc%9^iZpr:~eT8Jr͊ޕ,\Iig2ʙb*8Q9`zMcR.a͊V99R1渨|ʑQl0 #+ΠK \1 3Z7sTd)}6*EB̀T}V /\l+coY6! HSKd̐ DX6\dWB:~{qIdD#xA &'(τ %We2jR OXQa>pvG| iedžipVl 8+beUR̲ c(Hsi8Ix!ǞWa_T(3ABÎN6]KVx]"HB |AXc<^ (e0p~#H /V%;B5%$}U);1g BQIVA MdY#EP@4g9OQ$ce.F 8;:R~VK+Yn}~aŀZ>Z}.@X䱘o.|,_q#+y<_g_cÛ\JnRrx (:}`Dz\.Wb${2vg۳>- gy{Ԏv7h"W5J7WWg Wؾl7>L7e\],>j׃U\ ':VݫTͺb^e;^l7bۇ˳ՉPZPV|: H ODB.c?R?!ϺכŦgaN_OXz*;ubk>E% 9iyonw\{qts^w{=k./?mnybYo?4"€S/wݜ !?e#j#"3B>&o[n7C}">FҐZ\^N͵'{a04^>X;cYu7yJuow+gD|nN3s{|p?rCcSLp?sLS TB/zK^.r \B/z˅^.d}іѕQWuS;S`DdRIMMɼPk7SRa9j4bdiĐX A-r@` OC=vZ2C01O 2H!C =u]̘bQהjrp19XQW0Ps vǀD`bO;` IrlM@u 59؃`4?ҚhxHU0M"\{B{$T0eY_LSb'q9UkoHs_>!ǐQHu59BQHu*{(QE"yL``Dz">bYȁΡ}{9FnN}y p=T'rW{s:d ,=aSL. Jll=]}W2۷m'ۏ乌eXS\S\S\S\qDžz\qDž^iURSSS(B PB QXR,JJbYđ-?CJl@X5iLyB@aXKy#'hv *_h~2:i}EQNJ6] B3 *Xib`x0=؊ CN;ŀBDsw`xOCDjrhQFpVul訙-Š9x?:j.b890jtP к#+`GګNǃK'kN}=7YWCx;Ȉ2xRpI"Wo o*~:?BJS<(ӊ endstream endobj 425 0 obj << /Length 2544 /Filter /FlateDecode >> stream xڭYm~ȗ'MҡimwHZwAd[rEpHYuL9>_W/oW15׫*}&^gJC}{Di1S{cC>4>vǂFM0c>giuhnە}1*cPۇhWJ l`q@Il‡c%Pw\3m|om^4%#Xs'WYq4SǢUoPH"{4!Oۗ6St/~ g?UX6ζNfWG{^4)^Kvc3T{;qxbv m1,VJa^Ľ1_{ƧKj۝<\oVǤ-RpgLgp-3.[zBo*XήW<to;hr\EL=7MISV8vm^Y8fOw &)º2ܳ=+ӶŕO~d4ʓZΎSAXRrTHſۃ+. 8=;'tGΞ cP(}̓"$@j "`VWv^s4!9neq@29I~(/2a^s]"_=HNG406ER =wN4D>xP2Ʌ;).G4!`7,? _nA ) u1 'c5pȉ ߑQ0HR 7&tWJP3'DnO];] 4Unm\:HNxըE3py]s..S|dA\2^,8E3(*`_k8@>jR}GMPFعF@ @vg59S?Z}Cɶ&r_Ĕy.9et3͉| bjax-hG$~D.FD{>s*Zy/G5A(4}s~yÂSͲ}%9 s1;^ivj01= '5v+CqQzq#aRbY* y2AĤhb`y {񶹣w"Z)*4RwzAOQApW,A61U6!%DRp`c$#>2R Uj%vh A[gޖ[\=|r@>p+/LLBh:~ƫPdB @mK`¡)ѭչb >"@nh ;#aX(IAf44#=O4'`ݪ}D1E|G컬&w^ž v}N(}^W^ЦA|!B}޶t|IG[i}%= UkrRэT2 G%x 9:}c:d}{0 u2<bKt"@^140L:m!2Vay) ب@>$dTls7ʹ!w<( X|~`20`ͭn +? vLwkGY~!76ȸK ;)i D2>5Y;~gS'[*Ӳ g UKJ-L%qj^Xtwo ^ endstream endobj 437 0 obj << /Length 1119 /Filter /FlateDecode >> stream xWK6ÇHY4@ dR - RYmd/%9c˲e׻=9$gyIG7?Mo^ݫDqMHyD M'?iӟ_@;Ċ,' Ep{CE0l\!ԯ0I0EyU|&^&d?UsmKXo֑g ,/u(`Y܆c~\8zZUiPUg BPC& Wˆ'$ڕJ }W+2 "ߚXTOܥePpI"LI丒WjXi)ֳq:FѭumS !#+? 1ŽF2_oMeD1H?Pu`9tX\aBtV4R&TYյuK6UsX[ +(]i9qkh8?+YJ?;mi 4DsSI:tzop ?%v q(o;&v[|w0}X9u\HB 0nIEפ://]kcuZ`)%tcmN\2Ufupkmw  0> 2PiF0  k[c I|>g*PS$}=D"sFf$D(DLyo 5 GGhD6*{Kvg-yo{S)r|an3ˬҵSYut'|"+ +O&2Ѥ6(^M%8]sqR2YP`"v0Ȉ>PdCܰ$l0Ex Yپ$%1M'hJٛj<ˈ+B9x|[4M_^m ~<_=/u&9t't8;,2ÇmWE 5# ˠ9ȕzb-SMb&`n[Knz7U endstream endobj 446 0 obj << /Length 1004 /Filter /FlateDecode >> stream xXm4" 6IӤ6iN{rp`;U'ر&\wG'OvCo7_/fW[2Zzқ!K/] #o]x?=_ejl EĴ$0LMwZᳫEea AZ JÇ9/$ F햠$jkJhj-}YZp7J5.K$3@vW]!J34'TG^Z$.F[n9Ab&TycKͤ@b2goy鐨j$k{TCGt'lr]cW9kh;z37;SeQ*,`I_G1KD}lR38|ŝ v`2T Akߏ+ߚaeKʦUqvjlkda0+kz(?WW }HkO%À}kBc-)+Ж r|#***bs۴*&7sWf ?s<$` "ŨU %fT/ `iD1i 29wM\Jȉ,7 =Xz{s TG.j8G!W^]OuvL=Ko7[P1. XdvdJ#mw 4_UY=cQBNq1{D7J2<пءar݀ɻAC*jsa$֕oK,JcHҐ p bG`L`'Q?6WeْNe\{lsseCp:J;+y`}q"ֺ{D5E?[mrx tG{Ive{AXgi!ۮ~Pp_5sY endstream endobj 455 0 obj << /Length 1459 /Filter /FlateDecode >> stream xڵX[o6~Rt(5"%&*K(岮;RdN%H|`oaMPo|QR/EV~ΊzA9kQ: 1E1%p +^\Kv)[X`XZ^玕(ba< uS幙_:~{>_)y!>F)Hw0O̢h4<\Lp6|ӰeM%]XiՍhw׬8S%7 xfXI,ʚeG[umgBwf֬-َ;vrZffZ (ݩ&Pg2"Xax?|"^?ctphq>(k e3Uvt H#S5Q(LBGК f `IBp<o/U+"ƳS9 i|)- ec j\}]pvPesNjOF&~Xk2S0F@$Gh/=Jw71kX&di^ܳSijۑ3sT]}Sb\ΤOpMgScH^l}XS[lXy<եoY7<=1[(oe>w!Y ?VX̷C^fg>=o l?{{9l(HFQ0/o}s=AJ D =SLF*Ou>T9U!fIUs5f m|DmB&OQ[M$IPm Y#j]%уX)J0 Jc_(wO>XTeQ9TSɁiFOUi,C%W*.AjK7̪^/h骽vfk[4{DiR&Ȣ~b/֥Lk}sqѸXߎRػ(cڑ+nlEjX:t/9W@_m[S%Π)=`' g ZB"J)ڕi>w_Z˂0殎cҙwn(ly%6X(9RHeI޻sX^q/q vZc GؗO#B;;4^RN\] ox-)5Z9& Ty5ݲ5krHLL8$cDC9)-a.nrW_=!)"('Jy1q C`}44%HIwʨMoP_+HQDlCvC.@5T+-CA@Կ:@ E4쓤qy'ٖO8'x7v]c9$;Js endstream endobj 471 0 obj << /Length 2182 /Filter /FlateDecode >> stream xXn6}WC#^tK6 x;`ve&jv0jGqz~XZ՞aa,"YuTu|ǫ_ 'aI(B~pgRN,¹_;xOx4PƜQj̐rqo ;iM⾻lyNƎ&ѿE!Kbo/v(ةq??['`!OyM}|K N*poJlWu|/M &TpD+~l>6]†>A:V YZ5\Oy#ɪ^B֏1է+nLJ yW~5^&dFnp^l$ڋ,T }B 1<>Ud{c:zjz ˪[|NN7 So96ZjA0A7YЁ@{R8cT"&MS`( u1PR\cqV{;8SAQfL8Qxvgi{eD&ʗ=/8Zq>3`)dH۲Xm]uϷ%`8xD=]<[R%]L.pjA)WTфs p~!(yh%|GK =X˜,֪F1a~= 7-7(h3=gMxj{xڢMrC3"ڳ3̉8I<4錖~_0(&wct!~~G͛=aOHpRW4此v]&"m:h4)X lԘnqό2˗tA'LNҏ(*(U>y3gՄf7r2 $״Bnm ĵS o&!ǦH{\\|mM]m=Q꣰>\6$X_VČ"10mxJ*&?ԏ< oUkݦya+3$7+{+?+0GXN( IDїoh83ոߤ]5UҾR[ce>xмd\e p_v\oK&Nș D`e*(ϋ$ka-w/ {sT"30k⇍uL7dx3>bU%T2o15|O/,{7feJcSP:_Bؘ]é L3b٥0\Rq[*+iW'2  &| a_/ c԰`5X+!&cDC66+6͎N~ _…(կE<; ȱ 44QwP0;8j7}[8w1ŁTr3}&C`//f*4 ͋/ɓR, ?7Fnk ^M vኜr%)ʿE޴p1p9)0شW!9a󄔖PzD&zR;N8ƷQdaҹoWP;R؏r˥/`Ӯ#I)<9u,߼ٍ˖=d#rO]^Mܺ^I#%. tH&(1 a~0~Nh;ِ2ߓ@1+)/%t 2 %Kf8网pו]@v@%TH-2JD eA18G[y2I}T%7J0). ĞOHj@2!ưNRҘ7[{/TK<)<늴GӉFX(X@*ȪZDHU>H$}_ЂK &7zm. R|nX2(~j9ޒ{P԰^9@2lŤSB %Je+|`8og}$! D#BgW| endstream endobj 493 0 obj << /Length 1498 /Filter /FlateDecode >> stream xڽXY6~_ab텴9iKRDltvc)ZM{4mgtӗa7LjtwĥP8x-rt~}9[hKg '5ߕ>fPżNt)yx (9GD g~r'Il#K d_yq=85 lE'7`64T3>M%5W)td]!Ɓ;KIV׽~uÐuUUzoC$$>d(7T&Tݫ5>>Artϻ^HAo {Fa۲آ-:z5O i/5^(8eϞwLQȕQd_P5OFx3u1 pU/֟w/n")*y^%K1K(|q}avCq#WWWLn `E+7"ۯ5W곡W =E 7\|X"&6=Wx>:~M w2Π !O>o#@*+ƍj8 Aϱqx݅+3 mP!iBf7tG]/ M1Y!b4GNHNljl q-Yo)yCNtkvX^ endstream endobj 507 0 obj << /Length 1364 /Filter /FlateDecode >> stream xڽWM6W(1DRDC :m6fWߡHɖ#{cΛ8c'擛;&#$<#1JR Kh4_E1/7w<;ڙ02AOgB ][{_&=e(4Z/8Zq'sUJUu]j6Y J5*uH< ,}=DC)ʲi1v z?g) }-,Y/Izx9S|9tVtV% 61r58Oai_B4!?*SNV˺Ҿo/ǻ& ~O!)$j[5 !FyYAXե5.oovյj<nЎ6r0mIvVsy!Fg)PǬWGSYR ͦZYt~L3(%aΑ<`rKn2F!şR$ႛoՎ6$YծZ}ֻ/v;G<}ܔ*5t,Z%n]Va. qq٨@*m]o77ڄnvu6o*xrFNlȡWJqE'/n@dU]h$9n{=L6-UuEx zP;`c~2M<V9\{6Zwº{_FܞsiG # J_#Ux0"äQhf"P |m* ٜ=<=0cH&/=t40)>)k endstream endobj 408 0 obj << /Type /ObjStm /N 100 /First 883 /Length 2001 /Filter /FlateDecode >> stream xZێ}Wq"Bk8 M~P4#X# t S-}dgz`VSEt=\JHK 3Z Tm 4pMA r͡xP[Eˡ?K \5dJ6 {+C2g?)gst5[ A9S [&WJNޡ|8=5`| c[ާtp  ëvD1w@cd@)BԽR1 O0k-&؅4xV :0tm/j<\RöDGS0vUi`sf&Uz=xOм+ /'X%x1r1 _v4~{|y4u>'e0oa4~3n{M9$c}# _‹a6z ᇋɧYl# !I,pE+=1kWН} 2fb)׆`83(5I(D5 IC#aU@с nQÁ]vkiz M0 s,=LrN@% DϼZl(Gq\̧ox֭M" r}ee[,>)uʨ W&'yRugv6b"͓S8g۫wjŝ9$v߾͇yS6RsS4 ݝg/fٗN:mܜi? ^95+K#2M͐$5i!{+[@֎권]Fjo<֫ŀV0Ԭˈw0%KW{Oe}S-˴UY2RqdÑx+ΎGcs {99e܆ H\̏ȹ7ZnwNfsw[p-n)L2`?eÖI$[k HQ aq &?p8?{ ݹ]@'q,!?n[vE=ݨFMݓ!rc)+&OY`"=}W&O2tV??"_O@_ҽ.Z~傊Tz*$'멬-G~OBϢ,Mϡش~OJ$꧌RO\8?Q7N endstream endobj 524 0 obj << /Length 1436 /Filter /FlateDecode >> stream xڵWێ6}W(Px!{EA"m RZmrwx%[v^Lr83g8gMfw篒,Q$xXcFI&! V|ɻ=_`y J3ḛa猀rTLC7=V0:< fiDF!ep?{;ZO0J"8AQLά>>x?/|MιWV^^wS7$zsC0(҆L&#Dc%11LRBGhE 91#XI,HHBsù#T[vŮeksdJ>O6"}kǒ][vJIOҹWtܵy cY) V>j,^9GnSײZWI 2rؚM+`& ز 9φ-tiiY Dm(;'Xmh sIUuLtUӮtr)G I!uGd蘅ۻ Ü^Mpu@:0Lwۉ0~ǔyՄ"p"K5n rt١(}1lƭ[_N18f40I&1qe1?i|PGjs"Z͏d_, ֲN[!6v.~tTf}eXۑ١KhSf=(SG_i[qE ֢x;v/Q$-Hx =?*VkUA@tAK֘JL4 }P`SL}K$ՇBPquWv)5]εkW< ]8DФfٿk\%Gp mnmt;l4b dkXy-W]hrۏJTdnm'']HW):E 3Uw̰Z %4ɈkLt*~W΋47R)ajZe{ߍ,S<%sFp3HMr EM53iR)͸Bq4P R{l ~+ljlb&эaV-gg5G6Y؋n+waP=M^!; endstream endobj 542 0 obj << /Length 1451 /Filter /FlateDecode >> stream xWY6~X1O,N 7kݛ̪'piY,_uמ{݀Zw5i]3XOuzKۻZI]$˻۬64gfTwY#Ӻ/vkO&@]$ø0ҭڵBt ׋j->Soվn3.iХ4}|5MEާEyKMǐN[zaU7-\_+}]^Wg퉐(X(N?*H8zd (;MQ8ł)\M-J\0,J1ORύ I0ZUaH%8èqu\8u`!ԻpvM{yyWBxR)H9ϒkؔ!6A2] }{̔-PSL5~/iW9t-.:q{\ۙnOq.S}BB{js}H%].E~}#bX,lG] j ?y4vv[dfI\ TpHd!q?.ř m LO5 a5~$W&m =F gΦ^õ }0F;E_Vi𼮹Kn/]|Ҽj7q|Y}\ SbdG>W*{`T;#Z:>c#H_恡`sGpT fZErI9б]/ÞD-}7߯"|bT>jdm' ?B- 0W`Lve(l>ACPQW7&kɌX6h ʼX $E98)3YӴg 0? ZrNxȋfi]u:4CWa_䌅FHc u0V洅> stream xڥWYoF~ׯ s07F 1R\IRòك4)€9K=ߌ~brzVBzbizaj-fG%x=]vz  NGLQ]a/+fwqyzb+˞YJdn3g?Y ??9'zOyU񢼁pH@M0 mL{ƌweSZG^k"2Mv[E{QITEZ"yzó<r}+])͇#&^&p4/:^4gʗ57Qzl "zc20HS;QM\w@z栀,"tdH\ lN"@a^?oGOy/M(6>SQD=_\~@Etȍxi)500yUL_.> ‡rno/nbP~f{go:th'Rr^(mWk@|7Wk]?KÁxB!áBOiy/+{;HU]Ned <Ĥ˱we6aeܧ4սj< ml貶$X\4̛iqHXƤ\,zV9wg#U3@(`Sa4?3ފY$4fzWu8IxڻF׼g HZ?{:Š_=dB`a`bV忌ס~0C#P͔.>_]5߷|sF:r^ȶ2Du#Ǡ0{pg]85 v6a endstream endobj 574 0 obj << /Length 1742 /Filter /FlateDecode >> stream xXKF 0z2=HiɦHx-D\IG%^ȩA!?r8招_rٳ^XKq^ΙrEs)J.K?H*c8d֟qlPﳗ.,^)70W2@&`Ol |  nEԮKx%lcy ]r%"\ҧuhk6wKU:%ֺ< ow(mu\I;_S;bI\.yZzBhjsf(o}AT7f1 )rrVRz1 Dy>ZeaĄ,YE+l ڧiq# D Q@M'or2)Ѝt@ luC,gl7曒f`hvBRxN? JEGd+W+kgc.nyUBvgPxʻYrDhm>o]/]ƽxn!Zbxp[N\y&x2Mܬs )sWz!W9YA_+ \'cSbpSWaW MBfnFXkcsYТ|[ᬞbܟ:h_trO.G"+.DeeNR*1NUD&v[ (2/b>3<~-GM  ^T4I,5[~{,1ը`?zE %gn%^I P(G(!F.&"P=T't"q2!ݯW!B4n>\Xy@..&g㕎S1}Y]򇃡ca؂[R}POu!cP'kG! [!\ʐН |gœ}S•#a"UeS{dRqEY:Ye ܮ]Qdn.J;Ww<)"C2X*ZޟJFę*r$ Uh$c-ҹ"G,H+mȸ"?dve0NФM跕PwYAzYG@ICO<@lDwk#`7 2L+@ĨU Q6(N+]`sY8ZoUei"Bp{t:VLpŊ<&BOU?bK7]JLrtCʹ|E9/hx~Q=/=}sٲnR#!;~(v!uu\h>?0eU`G&*ϭIuHtfɦP,<X4"!R1jm_t9L֦B#lr9GG.NA}6HP`Ibn- Kyd,ұKZ5cfޱ3o[$-F&LWcGh\uOè~% ¨ϱԡtPI=͂.)*;ls38Po(~pzh/׃s++[ Ro.GB|VCў endstream endobj 584 0 obj << /Length 1522 /Filter /FlateDecode >> stream xڝWێ6}W.@ĈH&b ! RFmbee %Yv͌rx8vZgϞ+%iEʢK %YK냝oxt׳WaS\J)I5 \+(@YǏPr НӅ=2 \LY$zYf|F7seA-%oQDNh/n[ b HFJm32$NK6SB\TIt2?O$~+Pb+jԋZ1۰e5/RE. Hshc3cG7/GR $& yB%AsrkEȋiM+&Ϳ*Q;^נ?\eީ%?$I}I4,Z,׫GղRGƈar3x_hW^9g,Jzs\UwSvlOpU=+矼xrj#mjVdܚZ/M}L fRrY|l(?g2okj(4yS<r_/h8yɦ®䖏C.d݆3LI G>GGFsue|':QPV~'yˇ}2'̙@٪wC^*XH1&댂=ע]GQjeهOC$A+ѭPT|rnkRƣjzM_ПhzO 4=&I^kVJC975G Y b娛x>d$ ~$dZ8ƅ2(7UV]-0c̡ Rx)2eE--^`@ddϐq|Ĕ2[ܸ?XjOꕺ$HL>c&׹PKNB@jvŲg„^{Xц+hH{~WC Xx|߷YH8^YոlE5z܋-xYXrhݶ@ L%lR{'"}m7_ u±b0 F4 GLW*z΢iLt2,ǖX[m;h11s,4LxP qBnxKwFruLܽ1Wp8w4ni_]iv}}f*1=&p$Կ77Z]|%NYc5%\ ޼iwaϞV^K^HFQsuX+'DC"sr)z|2E2vQ0 J,͊/3$F?JWV(.=T{Xڛ뻮v~:sp^ۣaZGHV'-܃#Z jpcvDZ{j[il{۹H:=h}d<Ʀ2VƏæ\:$Fyu endstream endobj 592 0 obj << /Length 1869 /Filter /FlateDecode >> stream xڽXo6_at(1KREC[imZ!C(Y]tw݇g73>ǫodDI5Z,,VE]-gυ:oqJ3 񤦶ݮm'+I_8`!ZBL&Q:u$1LjL*<p-wGr zԕ g*8ۅ ŅM`16xsdz `{5U4BJɨi߸q%VjFX0dOgzEBBA 7IIbjlMT@L2WSb;3ґ[SjNat[++[cɰ<'EttwI.kF$r)S#/^\{SPexQA4? &b%b~u.imېg`>`!cVKL 5D FA;Yf+.V4ЩT,};O|%f n#/ 4MkP04SXOk  Ƚ$+kwW?r$ƹXF<\S'~`=ӡ1G4% 62/T 3)PV#~OϹw8fRtjL<!U 9МF]4Yt? }E )[AWܷ G&[!4mehRhbL8qpx|p& M.[W?{n9:ռ"Z *^.m@9ᖞuFjk04CeGrhfqd8fD1AB\/2ʖ9zkLu91pwХnT2aߣAԎ@;dΙpR<"g" n+j߃@k;<$? &H&E r8T&/׏x_`YӐ{H읐6nR /$xعQzl#; ew}{9ɢ0yPLy ڬ-aGZoM)@(`|5 |z{7u9 9ӳ -ď9nM7L紘kS~bq1|gHEz0U6.ScuCz=DK/2nuc/n;potԶ L4[eg(2VfD'nK0+aKn+{+M+dZIu``_[փ( r5c Y K ߓQ4DL+$S> stream xX[o6~ϯ<kuȰfPa-61Yr%v}yжb}"e#q/~^\E4"g:w?8rA{4w޽P7K\|?.L{8a0"}Ndˎ8 s8tb X]#ќ|4◰ )Ʀ&>fj5cn_l à ;qXQ } ʟBEb7R))hq#lJe4/bjevŹx(_r? #8ۇ=8TJ6(nE(IAMC{Ԑ(^ x–^?$fr0UP[2 ]Ox%vx.HFhd.S1p *rZm H,9qXĩEU w+¢_.zd%Tic׸̵OAgUJBVjxMB懱#Ah'|x X8o%˹`5! (vφƩZc)qB'ȖfՄNN`HyUh Rp01wƥ"T2טU땔);*uE7n3CrƱ`[ô8v2ʣS3k=ԯQM`CM -Pmn̆lOG ]淮T,"yLDogH?p=]H_}}F*PS~xǯݷ@Td`* ɭc {j) 9l4 6fto1Y,O^j5A?J(Ḉ)f/[ixWw76h(7&\\+|A\<%ZɊ z` >=YyѕRy`[=}adJ0%& $WvUY.UG?GUyTPs'}@G(+j=%ꡬݼ-3^r.~cNwuS|o-H|%2&Y]A{VxuB1 e30p=FvY8G?E|4?> stream xڽX[o6~2DR+I!h&K[dDd"Y]!Cܾs-=콘p=yt/AI1F^bH]g޻>\";$>x!d*]P R->'GoD{r-au \@G[,\i_@O'E[\ە(FF L'@,ZȑCqˑ[|xh8f$U.ynXl ~jkpv4XGG q- bQyBgo®T n,X.SQ+;*\U٘݅bĦ wh1*@F)ųϼT|~V 'r f`3P$GL8PUQ ˃mR|2.4@IFcPV#plCR 8ւҏ-bFQ@ +^C.F5%ɶLuFkZ|RŚKr#\4c\*soc=0 Ba_Ȃ7=csm\o8)x'FvmC u9NU硓Jɮ Dݗ5jI*^ځ}!+Ѵ<82silऱ.<9| =  t=y{[#/v>ԙ=SDK;[DAfËe <`=A=snU9_RnBEOX8MSl.;cZ Z/Yg>jV~HBӑJUUӖv7'AohrL=(жXˣe&&yPmpv8:CJ , lC s: Զs?.ۓIa&~] 嬐ՁuDF7; i犆jk)xڄHcMdܬ&aYJq뾗g#3!aa6Ҁʳc؟D'FDфk |P֊t3p 7|s O endstream endobj 618 0 obj << /Length 1491 /Filter /FlateDecode >> stream xڕXYo8~P;ؗ,Y`vQ$}j ,#1Q]Ց3<ɖ%Ms| Ϲs<_7wQ$Xly"'<̹ɝ/nVЋ]D<(Хh"+h٩ްl ](P.( ?֛ϝNVwY`}V4쑢ГRvxnrJflGRQ 1W)gz<</' AߒެLz4'4$L/ďA͢c马fby't}FSűZd^d>~ZP L} it[ɛTCFgIeWv JroE |47{誷k>[RN@ UbޮFRV]/xcv046iF0F|t5{,ӕWڱKR\%9(令zvΪ4Aoa蚍ќggX!Im*C)sV(#E$kO梅e{ EDÏ;~'^P|Ieڷ!d|hcrJbvAƲ3w7O>çEGPm[^ 0ݏU=^T8.@7.wBmF5PO鴸~e)"v-Zkx)ka]Gb@|λx~,sf*I5YZ Jrc,:=cx4V&WdE5Ȼ ʙ3H;+Ih83kIx(" _:~ (ّ$0I܇vRp{S^lx(Ĭg2m1UC'#Mܞ_wFTѩ`4r! DRI`SPy Vʥ2T+q@%0g*Z$4d"7lw;m^CMb|-Rx%~ -6cաj>0 k(TU%$&/>PXO8z1Ut} Y endstream endobj 535 0 obj << /Type /ObjStm /N 100 /First 874 /Length 1756 /Filter /FlateDecode >> stream xYMo71p9$gHF|m:>(QH$'ȶT9Kr̛}0ΰw>Lb"DN v]1+FL c@&%ӛL&.mn4{S.IeC>j@qd` VQ#b(؋FI 7I^f`0X0O784@>fkCW=hsXw#%gM!4,&UsWY0jMI:TS JOlB$hS0+t|&d8y t9x 6D_cTUBJl:,)C,&j2'4DG0)*11}U T-TąՒ/=TcEmJA3~VR FhMtfv0%u_(( ,8^ $BEՎE:"#}a89u$rUص'=6M'"UC/EB#HDC tDЀ5 :}{3_]ӾMtPM[n{F?[w2kiaa9Js,r +ޡy<{&Ak7}}1tK}yD}Ge/QAQlDKjiyrx:&S{x~f_<5dY|tVuO,3M[XVaκe/J.+s=?5Yx",oÒs`l ZJ1تV`%E[\%8 LԁYI" V) L9U7lմ *ɭ:j: ,޾o:>(Ձiv.{unD@nA%!%lJVpF|%<d8fQI2Y? Gd&爺HPEX5Gq*xz򡻑pgu IcVu`TfVs:06(۝g{HQ.[-}&C$cf*z, j Zr?Iw(Ҧunr*{CztfX؁AЋ/ư=HMN{Kݐ l+z!F%v`yca<LcSɺ QћևUWc_CxK}z_)7K3e\ W! <,V?zt^T !-\ &kfgY4[MI1uM̓ym^/zծ"M3qYdp޺Bh>E ْ$MH?w#W"!{Dޡ.tSt@^%VI޼t6w ]O-]~aB_vdJCa8tSʐOju`{|HB_5_-j{FpT+y`M0J;VBVJs ??7U endstream endobj 631 0 obj << /Length 1174 /Filter /FlateDecode >> stream xڥW[o6~:$fl):쥍hJJRiw(R$+^=$$sΝλoM9 v;$v( xyM((@a&dyeK?rC| lDeD4 q_8zvdȪ pwEhciZ鹽x,YuU_mUqdΫ~m8mOz#飽-j-TtS94lq$/±E[Z](瑜$FrYv96*{Dք=s;x^|]]O#/L ޼Z|-܁Hf󭣬EݥKv:긁N(dd>ʫ$(KKm,m] ;زZ@k' MQSQD0F:%?E(emf9G^ i>M>`͌3ɇٙ3Tٻ:{iGpG\Tl 5PQtqi٦bƆ nJu=z$2l Em3S֏ ʒ3z Vd}jAv'鞽|]zL8_~[Kh Jb(ӫCR?*BL}37U[ѩ2 mӔU>Y 1h偷Uz[O}Uid^f 9yelK~L+ڨ!.T7nmrBVr]`됟"0Eqxj6S`8!?3z?]U endstream endobj 649 0 obj << /Length 1357 /Filter /FlateDecode >> stream xڭXmo6_!(f3CR^@&Ea"[JD)w|,ɶ&'AGϳə:|;C0F;s3;?I|B};9cQc B8ORDFjS+f(rd6Dn*mE!(}.|")&+ 3-nK<;'17+~(p1hپL|UF ]Q:GL ׅ0mэynRL@ELRwlc>*e2(:zL/Xx am-Pإ!pF-$[3R{F̪ $[,.t~ ۆ"]j6L$UNF 3Ҩ'b3x(mPR8&Ei{hY(Qٓ7DtHo2yV&zcB':Z4[}sdF"ݭjgRIY#U]dhDI/l'KΏL[tШue:qJFQ ı}QkݏMNtūU#?Su5Er7Zki9N"Ԡ~1EfFW<)Zr.S0iJҥ9F7#&-gV\hI#X7z )kghtlhk}U軷R?6B:D ې.%~#=3򅾿+ 驸P!9(Me-L뇾Zu>sp,_&l:rb%b:0[;yًJ} }(зחK2?VC`2 py*58MC/l[I:){O(4\~wPy/1W  endstream endobj 662 0 obj << /Length 1503 /Filter /FlateDecode >> stream xڝXK6W@bHEiim%K{R\y,ys曑oޏ֋8r',[ K!֥gla](4'iCS4% ]U\+ǶbD{~8Vp^{Es3oESd*U+|vrğAh1!7h)^~TjWᇗHK8L%O$[-)_=^]DEŷ/pSMy*vc)S)W\Ħ\-슰CHDTѫh,9:HUb/9] '%O/y'+v`P21jZc$CΉ)/>EhP_9$;llnq^`d4^qXx%$3dH^DŁ{ja D<q4 lHQ<rWěҫZ؇W0fQ0šqk6GC3s`̀mSr[w?PɦK!z/[p!m_F~^#'X8) ܔclF6*CPwxrv02B6%A5B$UD{Ē'}BiFH$ֺicIOHlE듬*\m,i'FqLbʩAvDB = BSfC= 2"M}Q#K,ZOoWA-v/]G\Jq Q7m'ܞ;lNJvS׿.׼,޾VzxSj/Ǐq sȡFF )++fEcBx; BxL(6 b88Ylyd~bL$2G~` +Kd (qE~"MUMk<Z78Ó"|yIdX8i4yiB8uf䱈AznN34$'YY4Jɸ+r9  7+v!ID#4f`zz^wmk||ߔ, f~_ \c>XFnSͥMJpH髲7qɞ/}ϸa!#IC]>cAʠۉZ(YxEOenuPqy7K_pkQ!RX1\muw)a:aJ!d]¥0T: 6 x=(4VŧKifY5_a)e6tKܪKjdna^ :MscFf>b]1 \IvVNH0k(\*+&u3fgS `y]͕H橝o>?QUTz )d/$CJ9+]rSI_ٱ#] សW>ȬQ endstream endobj 674 0 obj << /Length 1649 /Filter /FlateDecode >> stream xXo6BPL"D=@%Æt_"-G!YR('atk?"Էn7o>F̥mb}e[4ϷnΚYXz sA8:z($]tenv\mUͫoqENB<^/ w׶Zz.H},j_u0KeaIva! Y) XVmKvT?VZQl"%ev\s)7KǍ",4AY5/#U /#ha#rL 5/dH;. tzf'.똧 K]v'vks0R+DžQSv wW)tu` B/Rtޜш.Τ dFH0̣B8x "hlCLĈ}5qϽCu^%R%Ŷq"Z)p+.KĉɫUn"C, 'w9Kv- CQڛN4"KϦr%遧]N=UCowiTPQTenl0~'όuLΧPS@4yd {/B!9&d`u]P${,eX{ .cH!=ǒAr핍E72kug4Z@Zߑ3ONǕHV$q@6q(QnxU*` T*hR:(2%(N:Wz@CzY`e:H1LPc94Kh@ ^O}[}Sۛ+}Y9R*޸jy+t!qr׵2Lڟ ~.m*!Cfae#BrOj'q<_}p*l87@7cJ Jr?"ΝK&n3-C3Kaccc W| e.^Xbh?J#°Ѐԏԗ?w4U;|<h \&]<"tDiqUK}4|Ӑ]& ~=5ięx1a\Z!b{IVb&aIt"H}~kI=A{q3b= tCu_p4aN";ME ^uqqpھ/آ>2EwI'&YœQwJ̓ a&Dgtd/jo^$R  o:e ޔbqAh?aK.pllWn,[^77駻߭Ẅ́n endstream endobj 687 0 obj << /Length 1770 /Filter /FlateDecode >> stream xڝXYo6~ϯ0R,b-Ev-V+K8~"Qpfad={}ID"ՄQJן>%'?>aG #S(e^g]Q>o\Y7wyzy&zp'Qf9sejj>{zq $a9s5'P`o q6M*=OTb1:uj>UFg9.gֈLtFnȚCy zyRO">HS wsi>jw]XJ|(<2uu|^O` Q-(9fk D <B*\w!8N^AHo t*Vy x@eUR 9$. G%E1 jR.:ӌySZڈJ N+AvR*e(aidE%tN;Q26[~H8)=||CVh%"wGᗝHȜ>U tc& #$yVؚz0o~yw? ?*2c_ԑJ=^#q ꠇ /OywZq._i"R2u6„z7FU礬T _i/wjK\^HBKⲦ@NxέK }@,p%}8R4h+@VC>>h!BWri"( r0BS0#]<$^`sAZi8I4"@oKWN1cDLQuؙ(Z |, :P̹]>+ 8~/ *v_-fډ`!CbmD9 HoǪ ŒXtsUd0RYhg2m%$Ndt}Dt 5)D$3HA'TނRf!FݘI,*jbDlkJ̨w'9NvP,e5DYWS˝\&oyaF?qBuxj|cAbY|c /=YJA0nݐC೰:HlmP]4HM"jDR]oMhjK{74HƲ>UmGFfF*U%_.u!ע9QiձsPkJx|Ž; (cSQg#]*H%9 qDJ6P;.EB,r5#V[7ɕ6)0_2Ftoc{_Akg;;Cp[NR!m|'ds(vx{4CSC~1aLh[J$=KME\iL*Tg( a.j&_ޤu}R/Юz/jsWh;➍ʶE" ׺d/&r[W ++hp6+W; endstream endobj 698 0 obj << /Length 2155 /Filter /FlateDecode >> stream xڥYm6_!(`j$J p;m~Ih[=Y%!g([82 {{{/Y~'/e :fY )V7Ⱥju?t8Y?/]]":EϏ*l?T3]jTUpT{YOs85u}/([`5j' .#_'*ҪMiMR%v1FTVΣR_Ƕ튧=% 0)Cruյ7}A'yR)#F!#[C$L^ BbbݛBp%#77%rݩLM",(σoo!PSFyYWQl|))v1IẗI 2|Wi r TѢo&ǀqXĿ@ 7 BZ$Vl~1cEמVERܫB8n^Ox ^a)4zPC Hj2CHSfS&by F1O7r/)i\ qeQ9(HA-ʡ&d&9ؽ] bIF>4MTATC $0d2RATcq`DMTmVQ ![tG+*\_z #ZZ?:Ȫiϴ qd[,2ojYЋaנ'k):\;`ݩ'۹yq-=j ]mW殦`XvP̎uز c}w@Z«JA_9>ؾB,BmD₽)lݔ,]P_o:Ɛ=h`#Xu N k2Ět?I]7~:,r1QLn0rC1g-pe(3Oo.96q1* %q4ѧt:zNMv0}1{$itw'j5Z~gE*2d,\Đu "(ixL@%OaE "0:, ]F $ m+F.1RdX\#j6eu2;g|dL)0 ߝlD\*ܬa",x} (v+A}Ԫj눲y,zfoeqg> stream xڵXKs6WΡ֌ i{p3^((>>lwAIQ*%'sE}|߂ZYc|r}ǥ%TXE0F6#06 %/?Zi AO/c/Ժ 6Au{ì1znxygÖ uqpf:_<4L{,}Bh]lPzI 6:b}LDĕ"5>vAMBQ/րH!s 4*I@P;KX<ٕ\<ZӨ묄晤G#Qy;"6m]Wπr @FޤK++j PY q%%FXSI AHu&rUHUot:fh\S/Åf^L.*T cK&W Y;J_=A_}(]NJ@6mST ?LAo4?DkY&JfAm '\)V`bw tHȊ:tV:NcT=Gvi2v"fH =BY=t>m:S|+H 5~f 3+#IyO5 2}z$̌Q\U;@z&q铍,-Y8J T7{'W*J/6R$+h!ZӠiϺjYMjr gDaFaeEùYo`X]45~|5A) M,èq bs e3$6W{Jy &x|SŐNg6ڸieRQ5,/ 9_o4rSTūb?|r_uw18{"N1ދ p9{U޿>gOG_ ]~[ytED5v7,(ʬ`8$eŞ׊z+ڭnǔF-3%>Y؈2y{d]IޯMm> stream xڭWێ6}W hĐ" 6IѾ>i˖lCEu8s{;{?~^={d^$LG0FQxiBGVN_?Tm xXQi L~+oX{:/^15Ł $('ؘhMGwR Zg]kqEYB{dp2&lS@@gr8ܚx>X2VkeCM]Lq{f`5QC`GfW7fm,4ȸfJG"1 N3_lkC Vqam%2dmI'/k;s'm=L[U{՛?+ZG&)fJv(ϼx1uA޻oH&A^0 IbQ#0i8il Qf~MՂ{!=ފuL1Uu[[g< 7>H`%fk. Qݪk/xd%97 S*%ծ奙Ό5DR7(;o$gZO]PI>k^q4a1*eFk-N4f"uHCUgkBݚLcT J_ɈfFIYm-JhJ,H݄ԯ3'DbSXit!^RP~HmTUJjTUX&ٙ Q^p, =0QM!Ak8_KcD蔎{ʶ3&JQ-Nr.bUmy ʼn.0jIC/(AINJȫ=/>A0Uk,Dɭf뤶K^j8(ՙ[C_K(Erv5}]Vɳ> stream xYKs7 W\(@2L3Lɡ"+GHr[?1c!Y. A\pJE"H)9M3$OvÐ$ )xf'GӈwS?*D)!C>'2bJ0F.d #A8, l0QCXAQ0$a`:;f(6 ibJGGQHbP@ @0H.)J֑*Vd3uś*Dh3R ɰ1ΘIfB,}%)NH``0LGٶKŶ-صfP**Waz-W`@Ƀ9p#lQ1AtvUUq,aHk p@N5paCLb18Ilɱ6(a1@h3f &WBd[ #48G3/d0 so|+D;G}$[Rf|31K`GCF&6%?I=~<~:sWb+(l8[Ng.fGǓgoBUx G_$-!6u:/ j̸w'O}4~}~ݽz<4?[,fnnؽNӵ;1{ĽJ񄳈ic(=uk7if/܃s?]~g'~xH w]9p7'o.2?;9y Cp1L[04Sa0 %O[c,-^|%g/-q!_)Ư}[tě!Vp1\Y#g?ꐡBT=W#ƭ:y<^ͮFn'$;y]|>[OCXG C UtX!&ԭ/2wUGV=>.g] ;'Cꃣ!#kdlU \X'ፂ t:2Ԭ.*ߴ3`sŅ/H_0NBB ۈ\ck\re [ĢaGb2kp89oơ837QqP$6yk3څ60eoqF{(/+oJc߆z3pmwdfrַE$BEQ?8'Ori/rfWw&&yS?3XjFU¢jO]`)KuvM%JEvL Zd po\46T v{\Fk& zA-xAz j߉׾}'^ePr`Bd$?|h@/JxpF4A)+ iNv=J~;|__٭f7?/X/3w#r,"d= endstream endobj 745 0 obj << /Length 1366 /Filter /FlateDecode >> stream xڽWmo6_!( 5MR% ]Mv(蘶%&ٯ_dK1R <3  ^~YWi\M(%q"LP?{! tM_(C$$](NN6zvu48v4T=g˦oQ^wKeUYiۮWkl(v4 I4Y.wv;kW~WpA մbfHYvV})6)(]IBr;(d.Q ^˭:Yӗ~yGs Z5~ԃ|XZ2aH1R: N8EbLvy'J3A gĆ* PB1HtJ4n8=pٴ);^hR>Qh@jR[`tM=ju`^N_|c/3{݂%k'ZQ+΁2rr,IPOHfJXy}S~+;EIHӄB1Dſcj.a6=J<-tE[m6][[ei'ֲ~I H7']HVuY\\źHTb~<9qnγ99CKZXU$b ̵BbuyjF1._ts'xcXIM:jSG endstream endobj 757 0 obj << /Length 1394 /Filter /FlateDecode >> stream xڥW{o6?hQ@bD=mmÀ]14CGKU=%[v.@#ugA1 HcwG)}^0FUk稍AjĹeojN6٨:1. 3"zeJe7M@5NfTdf'3G>'MD>o(vU7'D! U /J!@%픙@R62w{7vB*SV0O&T0pҫPѭ]۳BSRGP˜H)w^"F]V\PP,ı'\3t)4#T-"XH,eleWh}rQuWo:e$UWhD}?'̳J;$|ABB7ne|Cw\n5[w\fNE,St">FI܃F{ ^Z5햫̫xo=O:} k*N'0ڲ+`L,4cfm,1ne҂7hTB!̱ig@d2zHIX2,b6Mc[sIboZV^fV1(S8MݡZF,t>1jP{R(" *ĸg+<1o_)'ԬeWNC\t﷏\{ী]4 ޷R*Y|(萩 717 fi)[sjQUVH1r 'l?,ktd'9o~ k](ħ找Ks_hb+e#,Ym(~k6E:Wo,]yr1)l^‹Fsg{@P88m=2TƶW:dP@ӏ&3z_sɱ8(-A$N.jQ 3yu'/,U|׍3<2奎 %.rODOO@!adA?nFHHutmЮx=YF?7㠹^be*oNT^ՔGq͛"Wm 5\`'`i+ʉJ׏7B|5B_[zҝo ,F4&O]6(F68mʚsUf$`<> stream xY[6~?Bۗ@PIIۇv(n{-Ѷ6xE9';f9v,E 3\?<=|E3/CBQL0hyOƏ/_t3N(J`=ű:wljzzCS#鈠8M~x{كWY=뭵G%$yrWI40BYI/M=$>˶귧cs0#"l22yB&F)nȪpYFQp 0FcsB]y2qH)bAtˆI Z9?Ku!iie8 c B8Xо":&NM?9BJV^h)AV1-~=r  |z38(F (vJ9/nD JlCiU{-ܻBe{QQ } of`ͨEZM ,[^ l m(e'Rjb|Q.ҁf6^ˣ˷G` ^0Xd ?c[ G)b:jSHXڛt+3J LXU`5#֌jMhRpaLJ[Ý0 -x`$åNBQS]s!& FqqFs)#XCpW^ @¤Xͷ*.)b8<ك1`B w5D!jȿ&l؄$>XW3\Q5d|H}P𞣞o+Ȗf:4{ KX;6f'e"J p/dA_$E|tO)PsH$&!zDuZDuXO9Fek!>I4a ! 7rE9uRE~hTMg^ڒM. Q*b!bXT )cߊY&VVe!K`_j"Ak͵Q1_̓$k"ExC)٬F׳[8m Qc:Č2aI 4}\,y޵ҾOU_楲봈j <˻*V+s 5%,p4m뮃 æwL#䢟g WülgjCKrM[\Cp8#S. ~rfqgKB0x)Bjjv݇IkD}|\3 ,xjFP2}t w<o[KXI>؝Z$WQJcFcc{M("`_2K?0-#A¤w?H[}ݚfI w~g\Uezų{sśً_oWg?̦qyZD sw"Hj?4 endstream endobj 792 0 obj << /Length 1167 /Filter /FlateDecode >> stream xXI6ϯ1G$E-A:@ٗ-Zɢmڠ%>Q87)[lYD>֢K[N_}ݣ&KI0Mȩ"qGϝЮxD֊yZl'p(%ͽk{\,Ir.q3)Q wk"U#wYU:+.7U۽*i;>lͽ3FSgj9&f {/kKP_=00s\Ӄ^\;:?sIxq3* q"e~>.UWOudjH*\7UQDnosOlHwlF(qCAOտskUuS%q!M6kBv-$R&K,kϒd; ˘e:ojU|nT} +\9ۉm_qR`pw2.JMW&+XQ6 P]/rv2No°%nam-70E?Ac>*S2Wy(& @)h1\Lp'^HvהlgP_ #ug7bPKPQJ i A 14`:NFFZoIR焮p]mR450`ďy$9x&rØk3BN/61h(ȕCաů4T%SŸF"6+Gq5-]EHP#9e TEْoҧX6&KoO?- P*C)b(|ɯ/+Kq|1ʣ̔!RrBC~N$`O94F*B~frA<>K䋐ȱ%\ZʼE!a=7h|е[NČ"U /ݡ6]u.ƥ";amjVP!@|9Nnb N> stream xXY6~_""kDA[t}K-mqBt:CjE|xhf|3c:[kRF,|JIF8lv;xu(v(db.woC:bYFlb>RO4뾔UG`_>I9w^3]\.88hRʧ7-jg%v5r0Uvٲ0_7 nQ؟/;\+=P/=f0 ?8vPhP/:]+{q2A$"JDBǵܷH}Rƞl\jy]m6qhFtRoW!7s?:䩤pfm>!Xֽ+{Km Uۉ*VJ~}nɬũ>FYZzC)VKd{/:j"ʻ{#ʴ1? PNU%<}N&'lv#WWJ~(7ZsG}g.[dUCPIf6blhe!Ws:0(KBqW_)yfcѾBQwti3M=3u>wb8 0r7jkd6xoecH6M0@@H9u3O@eaCeͦ!rB\{Qp X FuaFCvÅALtwNH*.J'cȇZ*ǼZPqnC=aKfm_ *ND]vzB[-K6K]T1߃x޿@06`c@"C θ+ㄲtZpEX [io&N I0K`^!&.|l{ ^Kãz8˩a _EK8MS 85G&z'nt0) .Rg^zPBa5 I%Ia&M{B wjP}!J}7u$lb]5 \JcX4@nD`NK[1qٚuֵ ,J(.q@B 6c悉8!OȔ`lԇ_7:Da.l{d/:u,q5yM;jͪ.7c6'ߝ]sӊQ; I˭m.hr8&Nj O\^bVX5zPxeL`|?=mGᧄ8?coz.f4@±W0Ryٗ576!—(]n+Mݫpta<3[ZtzIVVǖneq&?Lqf@B5JTI$d]Ppv%PqEZ,ţ-hﷸgs 9]xx_VeyTNwkɏ$) endstream endobj 812 0 obj << /Length 2000 /Filter /FlateDecode >> stream xXێ6}W E%HhHmWjmɕl;á4/+8s n?=wQHYhqYΙ Eq\ܬtt:rJg܋>ap#t1v>JjVyqffgXE-+X5>o.uQ_fO0l\Iaf)u-*3Z1v*0{N: yolKm[ ?jҠM nSgö+@}XӱZLbtufjrtp)]wW0" V~:Qޚ')uih:zqM3R" (z|ʟ;J݆^go?mɰ]?6ہ:h[ h D: ?_1 AHЧLrj"3G=3p 9ȐxoEPQX j;`FR8M+ZtoY٬olM3و{2!<R c5?ǐu)F0Eј)P_h!Oppk7ePwL= ~ƭIc"Zld.)\2'g`gs:tK SijA).@nv *x2#T%rL>$@@cs$,uݩ~.`Z0ˉN!qV!0} kce ̅) :(Nk[g%ɦ>Љd% !J@ ֽP$;6crhSoխW '屙ddeT,TwfNZũc(t Yz.^́A> ذpȑCY}2QO[W;'2cGIBEpvLJ)8CgQ*QU*kiOXؗpKhp.pwXɞ؊ =e_)^`XlZ0OD#3FsVT̙r41l:{j_ĕǝ7 =,:+jh`}eQ.mnn>`t 5f4H(IQla&߷e. ́? 7>*JAS8 a= iCKɃ kuE,[l|һ֠0:#B̽3sPh8+Vq 5 BMI: Luã~0˰ H*w[B=@CFe9U^RYhy޼M|HoFc"B!&(_K'=YF݁^c{M$KZiUJafϥR޿ն b?r.ݽ*9 Dγ>ee KSG_0kwwiW_Ҹ3kVvt.ӹdȩ:Ӌ#uhl}x9v"068*q[)ñ\o*w3^O JwO倩[ڙ,JI'3@ c*]bws:\F/h3A Y\Df |C;qG9ψq1 M c%uX#˱k"JH(~WB?< endstream endobj 817 0 obj << /Length 1252 /Filter /FlateDecode >> stream xڭWmo6_!df5CRHѦhk "$fG؏ߑ<ٖ6xs6uu ^NA$ЙF)ЙL3G_aE. B1b~sYj'^O"uֺ(|J >t#8Z#_ϝO{ !0JB9?Bxvz1q{\G5fBq7vSܘ=h{O$:03Y^&Rɪ6~^#gbv).$ЮGoLf֖yh2]N\۠yR ES%Dgj;DN=w:zԃ -kY/4rۋIݵt .Xqƞ2F fU6""XGQ{}sk^%je42vO>n[vsCI:r&B[7ENZpnQW 8&m|oYU[a](eX/<\tG5; <֠X(ÓI>/ƖBQO-+ EsTe2 lom|n\7[5K>;>T@ŀ1pLtV%K{rg=B<T44n-Yf2ѣ^\޾!]|@-*)ZۣLoyXT2}d¤jyfՉ~Zz_ɥ^d=-|ޱS@|[eC[ o KpZjm2EG O J?鲒Ava˸R SB ^4GuTk>os1S*y uҽ`8~`_ endstream endobj 843 0 obj << /Length 2234 /Filter /FlateDecode >> stream xڭY[sܶ~ׯS;ӴVuœ.Z&\rK98od9h48|r_|{&Jۃ'8gAyqę w{w˲woT:r3S]տ4'ƕ`*Vtu{9 ~Pxf ,.W[>!/8B<`Ql?N708L4Nok}l~ _?Zoc_Wyue[@ Xh%<&vaQJ5xnӘșlWZ@! }7>_ yΩw/<284IFwYQ=Mʒ4F*n`mBݗo{RPoRp *eͱI:Gq64nSv4ys2z3a)gq4OoWl-%q:غn>(TmB0ᨫ|2UpzdvQ vB^/TB6l / Z;;7L*7vtEQ̘㊜Cwk= f'Lr+Sdq@OmvWq~Σ`c1\q3\kw7x;7tnt_lo'0/xf8CKgk]tl9<b쥦FXUkhCD<']ەa(\V7_yrh ݹ4Ԯ37~ye P@ݱە B!2_<)mEqiV>"c{v*k=h7"1!Z~{ڟ1 <#5bc`ތg% ( Ϫfeh8lyQsx)/# i9i{b[96j BΡYLK@:$Ѳ&HhmZԗ]q.Δ&S aj4 /k022 1 +ק]AִU~'@` ݊N Y7Yqd_af݈p,VS1Gz_4+ZI&i` OP \Q:wϧ]V8:E$Ofdn "KҮ/Vz=!p2L ]\Na$;8~NC!3ua.B;tu~W6a431`[&K" >.W 䉦 YTč:?.S]b%"|bBd'@e{<16F3jxw&cMq(=&^ qKF2Dʘ9Imx z"&苐C [T.oB 7ҹN)a龡ack4$"K$\'CNs'7FZp~",P } @h%2:AQ<. `eqkZr*c0y(Z=~–&"v|ݺU976GaPg=> PPI|Y8P+L; cX4& ό*aQ $ΐ]3sxM`##C+5Ek(b-Zb=Z3O@7 X8h82XeNvv}wG$6BIac;Yp[Gp r̫ Y5MBIP+P31FE F 'f=Fcl6!7$2UQm=emv%byp@9NIʡJkN DD?`?z:ы[nk'`QcfŢ]Fg-sI>Bh}ɶAes4ŻQ\te^X&tn^~Biww*ǜM嬇G*:,g[[ws Yui׃ FLWs`8V,JݵjȦ̭P?wdȦYOȐwH𥫞7I Df9A{J |*Mt UD> stream xZmo_w%p%p{@{hb+Yr%H}Y|*-2;|8CJI]pJŔ]Jёǜ\ruxeq{q9ٽ\x)8!3KQ\G%\Q &E`S b; .cV\Tpekӈy+(\F . .WŞU xv WbsqFi)qCh#l.J@BrN *p l.!jWX+g{UM K&(RUts#V 9SKM4RUǚL .P!A`AsZ@AMq9F_GۛQby1Ypy>7trvceߺi->'4/d5o]Żk^N?z~i\ k{aϗjk^>u?/WOϰlv6gbc>ڋoEx |#Nbn5rb 6~vbCv OoRXD40o,o'0 ׼y}w/kwvތ/'0L}nZ,'6L6^|q!bzB+}1~3^l[]arSA\w!1 ^'I=~{_.+x`` >|r|5d 9cw!GoQ bƫoBfKu%G:JI3B//Gړ6!T F~Ĩ|=%3Ѿ8v't jwС+ЕsB7][er?b葦4sɎ'`;:@!tafn(ݨXK;ʵ[?iOu4E S YV%rAbe͵?_ d SFmUUd} ]x̚@C/{ jߛͩTbU!.p)tb@v9j㣣v|=]̛O?|ZWlWǿ?\/~l#jza6qGk1:G ɺRolaD4br&C/ւ!k}NE ]zu@,)D}.e/z< UrYiM! h٧3D*hFOpx Yo$Djjp#Bm]s5(kͱ H OA3r$ tY\ZzYgm^ F}rc>ӋhXLmO-4V eѣmwiuqB,m~Tk?k\*+kxR\Kqݧ;o7oo n$QKB#!xF4XeV!;%z䌂Z U>Uq}vX]xjE*]ig1u#wcFA[5 ٖ9ٹZ(QVda&SHq\LQ-T1 Pu*>}bt`Jl7{Qν$!*#\? >P 8"dS-vfP4]!P$og+?@7CCȳ$L|@ $E 2=t@q~i .]`V 7g31IlZ؂I=s~X~+i~p* oNP/Za endstream endobj 862 0 obj << /Length 1795 /Filter /FlateDecode >> stream xXmo6_uh!1KP/Z`n]~Xi[,y4@x,9J')""ywϽ|ٛG??zZdeq3sF,9a0;_x^`e, "8ǬyPJGܞ,ܞE$|רuWShUy[U3:tpHtS2SϷJ+8&<鈝.it8VU+"Ҳo=:s}UQ9ά6nݠ헽VMs`Xm R gG,, VGE6g,A P,i1ÔEǶT薞ٕ`,ݾ+{s~Z6㺈K+?y0d~w#^~ߗZß_) El?U$AhC;RJU4UJ븍z y'!, lz~8_rsoYdU.!_Q( W\e5yw-ʁk/v[TeA Ib,,Ti< KW`ٕXWSi0Aa! /D@*OAKeWn-RߦNjo6h Ob!(Jr՚iY}2 ^WiT4Sp>gdVKPi*TW;p}~iZ/$Vf"~vJVMU/Wӏ| 𢣺Sr !-VU%-15rkb$ަIceB >q'x+?a4e Qfm]*&"|g8yt*/iaU)' |? o,}킬$5d7iz`$P4Bɸnl3ژ 1 }eޕ XsL0铢s`Za\*b!lBv0`a6 d Ġ qemQ\k8filȏd=ca \OyX ZXԔ}[ ',IjnAh*#7+'dH0FOY_Ff pmrJnsik besXۗjC*LMXP7՚\!q<̄'%X۴ a{)EdG Vui R-Ubՙh9+ (בALPnpcεK麝\5TT[r\n.K JzEGtJil({E7.J,z|Y˕Ύϩ 6뺫V^f2ll;deh*H6_ >*LZ}ˈ2ӁC6X*ȵfZNֿ?4ѶǸ rͶ,t\Nꢹ2MX}EbK  'Qpfj?R8\^o])"r]t=m e/ln'N[vVs·6j ]˱nFr 9" $M5BrT+%ԁ1 篧ry oVH)G_µ,*TTš}Nxi^Nz7 > m{aCq8ՔVc=bYXo28MfWe endstream endobj 883 0 obj << /Length 1697 /Filter /FlateDecode >> stream xXmo6_!(j1KR^7tX3lIi6mk%Oz~w|%GMaQxi hwGϢ$HI(\R(#Jdȃyp5zC)]9M(L;+e$N@Y(rDF N$D q]WQj79.WeYtUۗuە1a}Hʸ7w+3MF^`zH XiI KR/9VyiV#c*$$/rr;V7RK}:WݨVy@~ *}ck NTk;Kb LzVڻ~nf5m8W"rO`,g&Q,r0v_IGCV E v]xkX5aU7ensXo߶ֽ?O8=8\5jHaCS`SE-l&!(~o}5q x9"1+L-@(}}Q_@vu]])4̟ [?-NwxUm]̷U驪5Zs !x=[ˣ?0}hvi0[]>c$Lֈ ypqkۺgBuz&8A DD(eƼ4il,,5=K:ګtMĸ{׌ r (q[[6MV=â.se 3ly1[mQv$&"n[TXP#Xo`SZ3V:3Դ;ϵjf+Lˊ!Rf7Č>A&$~ʸ?"˻i1!"jV5GdLx"J7ʡa =8 m:xރf$&ɹ z)'? ݘLxr 8Ba!ISX6QJ8-O'kHn5_`@wt]wMً#!GrFO\k=jVJ_-{Abȸ]0ELk@LgHJHXv^ղ#l9RZTh`wn48qLx<:3m׺h{ꀋ UlwPnI[K_46*4|x˄(ᝅpF50!*,xFȭi4SW]snxߖyc8{5|wU-)c!#EgCMcTύ#ۍ-QY^ ՝=}dHSK3qFF23e(<3W}i2\j᫻ibCl;-n.>?d$ yHRYR 7[zX)GHyx*SPE"! 5/u`0JM,Apt@',,C<:Ɩgaq_ Wۼ9@J:©ܓ!ZFWsksNEd2:ʯB}D 1^nfUBϷX,EigELh?M82 a]KmnjkﺙK/hWn#ҳw)*8 C0+ KVo7x .fh5k+!N<BHۚɳYqUoٳ endstream endobj 902 0 obj << /Length 2538 /Filter /FlateDecode >> stream xڵZm۸_a,r yH%@pH 4r~Imkɒw!%K+[v"HDQh8yq_Oo~(EB0}xB,=Ǐa|E>Ar̚䍪~/FY Gz'Q2r "(gl U*'dujZYDH/d"b@锔0׮9N$[p\1JVV?$^T88m4R 'Hq} KnAm2}:deA`.V–++PT7/r'Q'x];eǖH;s~/n{uV}y+'dp)Nv=%3 f9`0xW=Z_¹Mĉ AyH||g/<xΚDyja֠ #qP95YD8s'?Dtp:=f~9g! 7.{%4 g 2c ,Ρj3,sO{d:M9?T_`eνfp)v_ฐ{0,d^ q.G=N2 .$- HԻjPOzNr.3guOxSF+4q޽ǒj2 #QӰpqpC0u C!݂11@lggcWZa8'Umj7Y␻ē@\l V{TЖ<?)K紲I]ϸ NICݙֶ` ,<՛ :-OhUP{%^Qszhƭkd=j. = ڍp6a $׆*#st]7eQU+ܪ,Lor{o׵:g-L|8(YEg-ZV)cgϦ 6clBcS.s6f򵭧p~{ʽ ,A(̙X5EW<Ѯ+ly77Xt&@k:tei CK_?M[tjs)2H4sxj׌4D=7~V .m1UHP?tGy2 r>QPUz-|rQXw( [@5X7_+T!?_ endstream endobj 913 0 obj << /Length 1625 /Filter /FlateDecode >> stream xڵXmo6_!d(f +z t[}銌JIMwz@Iw=<^pzq^F>aa%O"F{lZywˋ(~@t9ioT>^-xm|26ǡt u}9|}v)N(YpZV- h[aoU]⨻8xӕN=|-R[g?A+JyOۏ|#v/Eյ|>^uOGSkkS4J¹N'@ '| y>*i0Lir*$;1%zRIH_''0<􆊗BI@I{{:Q; j^M]^]>8 cv;BpoK/ B»ZhB@'a~LIO.}dibZ@NԷ<ԽQ;Qlc1 hwJ6cLGY 2E'K%EC d`Q.M௅1^ڦnE7A5)546H֝$3'}$kE_V6Wx~^oh 0 K5YSMnCٚ~8s ߩm -ʒu;>Vଫ }.'zkh*(bkݷ(Z3Bv;ufS{ $GDgC!L1Mt N:Q0gl@" o02pZSH4(a3(Ip+V) 3]k?_ꜘ ߽v~ΚWPA5['ɣt1 }+wn4bBJu'&cF-NdfVD$^&J5O3 ^Qƶ[FA, ewm!V^V\h5M:TE9\J3# _4 @$Ɩ#>`iFe֖Z9Ee4zAͰn_xg,6Jeyhfvф1`s`9^6=D$"~R0JKԭŨo] NV4S(d%bIC~zd\)>9m.ük"'1*Dfʼ "MѰ4N)HfӸRXIxޮZx0zbn 4Dt3w3E'ۊBL+\!V4m$s4ɂqHĒ^}s_Om`&ְIzؑ61%4N3KiH\>˞"_*us@uu#Z|5p lrV~>(# .[x_ʶ3M-l1BȡqY殅vW㽀fk" %R6CRfh{F  MϽu%KKgkV1Hg.Y^xfuxk_fПOVئO!čxϾ/_z^nfo&/As>>F|' endstream endobj 930 0 obj << /Length 2360 /Filter /FlateDecode >> stream xڝYk۸_a, ČDzibY-ѶP= =rpH{pE_Ù339mϯA$%!7s]& ]&|g;oox2ҏ|&D1/_W5);;g#wsS_kgӗ7;5켄E+4bıO!`3kc;BUvsV@ҧUNy,JTM=<АN]h:awЮo3WcN 5 dgE.yuz>0"2O#:5E8 zxoV ^l!| @(8Ǽ(h&tgmF$ޢE@$b ؁#kr'"xk+[gJ=m3FD|nzfF Qn1%p`a(L5C׼J"R2@#%sQF|,aβﮝUu:mdM.IX)l7w 4I䱒QSኲ2^Kyڵ$x̻$a,`6pscV|>d";XJpAR&30xhi 9[p1`y(Pdp3XQ'ޑHx>@Tc^Xczy2{"X?((JSuAy I/ :`5J!_n Ÿ?lL O^2i0@x r鶲Y/pl¡ "31ދJDzo/)o>nxȮݡxlZ I6OL.6ABӫ:ni[CA]o wmbS adh`s0@e_s-<`0Ԥ@wuap+MHPW\0<`sB!]L4_sUd-&IyU^?CU9)I^:MpӳZsIP}lo _t]IFu}SdxIRA"F|HǾJe )!ORMc4Y| |.2^d_yR-JQtyy)rZe0^K2 [Okp%@85 V7CC҄Ѝu#Пi?hfWYN>eAÍ"4`<4MٙJY~5QUڵӂ7Vg|3FܒI]1ضû ̔!;Eog` U|`7/Z9a_'NUu_e{e17ɋT|iIxЎ-7D$Og :{+=p$w0J[#͉!ѣ-$16U7y]F@4;[71cKoZh3aS󴅐J%6\pE`$Qm_P$ I`u< M`r)fPBxKgC#oJ=Ahh= ˈtCya=$/C4?E]I(Ӧڟ)iˑĔ:gmC~W*ˠ5,emz:j<^ͺF^Ĥ`DRh=}aV:[+`T7 } ˌnK `U}?~WlYBo1ψQ돑`C2bQ+5xz[g)$ROE 7) x|縘 b>gY:_O4' CB7\n=;ւePO9ܔ}QaAc_Xa~B \%͕Iڟ> stream xڭXmo6_a`(`1KK:`5ð a؆h[$ ~Gi:C:S<:[짫l< b5)%a҄8 frŶ{XYCG'iW[Qc[0MHyE _b=b-]D;a~D"?£OLAYO荒sذׯ2ow/{Z\kUz۴<$TR\EjNa{+\܋H<1kI!'o:ѳ*I s?lT%Ďj#@ ֢ ա}a]%јJ}DGO<@Z'Hܞ˜< ?SUnB"Hf 1 {# ,ˍ%.9m%7+۵^֬#^'ǹKwG#.4-? fė4!DbrY`s.,E74cHĻv K> av ֶ]䀤Bo4L \O. c]iLaw6d NIJsjX C8"iŦ#gkrx霤i?Mi U c{+!Hš4qdB˰ ft-*5~f(X/Q~씣_uAP,Wf[U*撄iL QxC':%;n@m0@գ(YbC_rf4zј=y+UP$0&Y|-n#xE"5]&AC}"0ΙWհ9L`D_' r40$Hݏ)~jNJvg:^($nml c[{K^as2v+ԿcW_HjY͉cj'XJc9Nj]c h6VΛҋJD-C \􌠅VS]FyDK)lbzyʁ7@¾JD~y/`SR@pys;tvs٦Rrj f{qy@yݤ>˰z.%NBL+]?7lct%zP eJԵx {N;Eiʀn' {>HSwcFxH5/5RCAs͍o|'?e endstream endobj 959 0 obj << /Length 1304 /Filter /FlateDecode >> stream xڕWY6~6`k.%QW"4Ţ@SDte_;~_ȊP;X6%yc˜8o?G"~|͖{\fasV|"-]|c-@r]b*K뷯%3`1@ Y M`wnڕ Sk) ~Ev҄7z<iaXfL{7%Yb>$nDŽuvǎؐw]#{gi܈ׂ &5 ,C/ь[fع}[@З6j\ĔWCЌ7NӟByexu8q є6FMC'Oo/-7P7~8D3MxM^gH׬<~cQ< ȴ-g{9oi9b4-[wfj Tl3O/41X(Sߙ ز|J"BZq> [ l?FuTEląyf=nS6d^|b[$pF?Hȁ38(,2C9t$-ˁ򔂷U/ۈ?.BH".E{Ǫ{@Na! aW\R:J6jc$<s*3ʂZyX7o [? W+?@O,m -8:PRiЂ|늲rӂXWm)J9}%>|薍x P3X;/WEB&h<^5,{6DDBT/}MޠK |ޱ=g1QyLfr`4A-XO7sUb=,lOzi# ";-|o.  Hs:C@ԗqD%n*+NTF3"5^6zKkG-۞f4g^mE%ɞ*xɔIqII6"vSӁ H]ٍ gL:ǡ-!,?Ps6,L:Gv#(C0?|TIy{(LBMoͰ KyϴO{ֹ]fAsGO,/7ŎQ֍}kj2?."ȴfYNEVGA!?  AnnН7ChOFQ˷rZVTU妍>bg1/Aӝ&0lx3]*]Dn3B?C endstream endobj 849 0 obj << /Type /ObjStm /N 100 /First 885 /Length 2214 /Filter /FlateDecode >> stream xZMW0 ŏ"k5p c ϴJ4@$^`|^uH֌eW=rjuUٕWI.*vW]v]R-fre'hՀѧ9NjsAL@]Z1BH Ɋ9Xd"ɥ] M*(b*.1$#Kaզ<00hR1po ɾ|" 1ͭG{ NmLPv@=c: ` | \{3ǣ=q/@;G? MO,GR{ )rȲh!@J|jɡx fl cI 7wvD {-U8@<)zv3& #;E QqP{M$M#i,Wc*B( mz= fo-,%{1MP{NٿLL9djd1lNȌ( X% k$qMKqo~}o5 P@@k/aFL,3M sw7HCPGXD_KTjWwd}:e{Lb[v]3aDRUlVdȄ(ji1)j V> rMx.̶WK;Ѭ,YhqhDH6v+q/7߼[on7_C?)’ϪO ̈v3HĂ`(K8-DXNl gzhay;X3*rx*R=3TNvصԷܷ2fI)9x焴 }1GWlms^ub-ǁ"T5\~bvf1"jl%XJMjgd .ɧ\wy;b*#۷^"J";3/Kdsv-$?=?&́}:LȎphp6(xmpEpՕڟSkNm/tm bGz(kyDHx<7 E+q0 _,*2 lnGDSᛁwprfT33X0I${p8|SX۠XE4> stream xڭYo6z%)СͰa)-ŦIr찿}|-*cCQ"B]B~zs- 2&Rf2Qf.v]{1G^@҂[qSXI)_Mw [Y.{' efAcWLJwpy B9{ήɎD%tLq?LIytlī|цpa y>e:6c{tk) Ĺ8098w(kG$SeHH3@ȷfgĦ[݅pǻ^;[1]k["{{pWG1|8l*tڬBTDr2`I`I0Ŕhm}\O1 C~v>K}P ><1j_T+hxKŢ;֑j%D)'0 |!dO~L*F [ \y"=k4 )"E:6kS~ ~wZRO6,$PVu #yƷ>bzJfd11zױCeRO=>GzQ5ch[jTvc @Zw{7Ӗ*x>`uFېqO5.%hWr0 Ø=?襰r2v۵wlΊۮk@#"= H#/ <G*ޘp b[`s#$O]2@޺սAZ-ί5p'TZf4G+PMI4ṸW2y@c *QYxnϜhfrߵ7ԖgSD.') ׌cZ'P wiA}bԓs8іx]Wl>RahT`o 1Z|9d0dz!=1=^>j,v(6c3WIC|\o11ߊ<1ePyy$^E"Q 6;K%5跠P<'^X/=Qgt)ARziQ^L 4YEϿьeS}p0zlN_+,PS9KO[nz fǜ0(!\Ci5i#|SS sYvg i@u{sːiHH$L 3tӧ#+4iKIFWacmXs@O9DM/]" endstream endobj 991 0 obj << /Length 1088 /Filter /FlateDecode >> stream xW[o6~ϯ!)QͰ.1-Q 1&J#EBW]ёtx.Ρ[:]|&ǾM!p= |moWW~8t}+t:uVosZ) hH5~^*KZzx=E#@Fՙy19pɊxRLx(H'iy 08լiB֘%՗KMwEp}aFK1L;C^(1L tƤɊ(Қ2)&yO" >;Zps(>fc/,,p\ib4=0=#\~/Qi$U(q!-k 3+L){tiL#z}^Tm3 K梑"J[\ypye+,Yi()׮oL#0К ?ȴI4"nxY*@ʨ~yhGrE""wDɞ=v\0ќD OZT?ǷIwSkUa`U-P_䗧m}Z8b%S}f 1̛BEF-饒.?k 7ozhnR?+gmb"JHopZv>u{Zj 0QFI 8PR8kEGlD;ɢ|0HWj ܨ\8q8wcCUI\@m$3uaάeWVSvښǺ(xKb1gؼ2lt\54Yqۗh߰jxqyX{p/h6j%!S!Խ)'ԟ--10_N1TuR!OJsԔڊ9<(.1sfX_^dGԝٰH,VS7 ya,wTubq(!j0F6$튐{V`{dr>SE΋Ohsp:yo~a70\&׻̑cJI,GoȗۋB^ endstream endobj 1006 0 obj << /Length 1084 /Filter /FlateDecode >> stream xڵWYo6~ ]V uPGHEo_ `$UޡHْ8qa&ofhֈOӋ8E F a$&ZjۦѲ,^Ь')kI#jHf)8J6 ![8i'DM(! 3[VuRS٫6S.hULVlB%17-eg̨&lKT9`ɔJIQ+8q!,I]`R<p}g+h,n2 "FH0vZH[p̻eyU@.R\\wVڣVqwd!«\401ajAR7(cGf[Z.Zm7nMWFgkouIuc*JtC)KbLA AW4t)s90i@iͼ48M⎵0+P)McAWK[5 (gBmAkSmT#@bLqfe8Nk=7X}f$Fsŕ>Fu=xjU5@$I?dZpκWag8N~d9ץ5 b']%W\pTh endstream endobj 1018 0 obj << /Length 1758 /Filter /FlateDecode >> stream xXYoF~ׯ  6{q|A#@/I%HGweZ ({[{ogoVח~(TxG0F /zuK!{' 3GU:8Öjv3#@bt@g^}5l`+ޭ>8!(u}}lZ|'^C_XI^{}W8U.hA|XR#ğR=NJWׅAg$;}>?b`/L".\LLy4E[Ui[QnW:,r.}7ay)O#s|TUɛ֎Mm]-'CPL u@I<7j-P*'yՔ.8fq]6[փhKsXPW~ZqS{fi*e D} JU NP]gɄGQ }d 7T6g#B:缮pnr"@:˜[Å̢Ï<'˿5Y;DiYM)r}Q;/~FCTYCe&kZXۤ}f!*CDc6wzbBL^|6 ju u> /:4o-i͠$Q_ᙤ0"o1"H]":laHzs䧷3o''`r[?@ua`RNrӤ*al@a\:xB.fl2Hm8) @HgMu s(L JtdVu FTkvP8x-D/<;a뾸ש 'uNO 6uVA%d)0UEwոm_Kޢj]6SEIDEl8 WD6Gl(`*QZN)S#'uK8.TN5Z'P>in"{H'.!M;xwծh2Oteju,mԝ*hI Ka^~5_6`K*@{{u0ȳ' YMcHq03{i- D0] 8;Ҹm*Zӭ$Dm>}28DDˤϓ UNl&y40H2:ht=V "b(ɐB>\S x qfKEY)=Y[]ebZ ZygPBksx!^'wy9˾|"+HL7mJ58lLQU/^H}wt a'iN .W'#ܕb'IUaZ @ cN-ΧDI ""s[pRwV10'vpxEdZbD:9IE iFcf~ bIj.jgW ԍ[L+~I^]K<QeV@BOuPua $΢Ҵzi׫4t!lXrmyYCYt>X?Ô0W0cesĈ<b:Oo 3 O 1|Rpm8a߄Zs3묃o i@/V> endstream endobj 1032 0 obj << /Length 2145 /Filter /FlateDecode >> stream xڭX[6~_! "D]Lhhd! f92e%Gx}/$l+x9׏BtȉQK1 GqG_~xvF@KC wH5]9}˘.xފ!7I@`uf Z<. [)+.Y+F詚o9^k}|<@}qPRbJU+sIev=۱lIme:%]8;Mvl%Rmz5A!-<b8D'3k>%*°;:+|<E7|0KوJL01"40lYeSq),RohM%e ݵNr;e, ~^EdR3?km93>S7 PĀ#0Vz>s1RJE#.\Oi֩L0#ohi@kyVOz4D\TUmJR rU6s#cOk@O-rKayGf GQ[] YQ}],`?`ZB40^q 3& A @#w4GiHp=O&3>y& c0 (/R{/T. Ț`WZTed #yدG#**Q.FTG}6$+y"F>y^r:!eVB.}CCV'Ў5[.+O*<+t7bJ^JE΋uq/tCLۼ{^*knwP߉ުoWQ/x$bnEaK(>zL{jUNNmh(Odcx5T"Z;s`H9 ƶ֕'[C,M{`PslsrSI$jPselU -`6~Mz02:hB6{(3-O6zȱ M+=W:% 12i^t U,^dD)ۤ/-ϳ` k8%>gHl eRSb חWa2y;n8HjMEYDc2{F;QM3$FIu3',rL7  ŹC71Wb12qvgϐvI!M>@#Qɖ^-FdO> ϩ?)dfUR6[I0jJ&ֵlwm2}/2Y',hʟPwik `mcWXVܖj .E# o2g0XD&3@9ҰS3ׯ2TC;j2b+`&sՒ[]Sy6iY娹Tf_ڲd/%b8(K|hAϭD*M¡z5_]Ya:@u7҇/4YA0y endstream endobj 1044 0 obj << /Length 2566 /Filter /FlateDecode >> stream xڥYm۸_jY􊤸lkdɥl_ΐz146Hj8< - Nj__\I,w@Jd&R*\oQrI6@y r̜Uu}Ɖ/$ɾ D Fo]ehUC]RצOUG=Gݮxږ[y}{ihpL#lj5vˊX>]R.?TU;r~k[%eOK,l7>+g3Bʳ'P:Ci5Fi5ٽPtu s]J#[Z]G䉺@Efe8 G϶g+G,&a0N΂qЖLXBƒ?]1&za# ;-ꚕ 2pzmNuLjx Vχ:\ > Lr|$.ua;/r.ʃ(rS}2܂Vv `;|-ŒXCHWIAaNGf=e&աm' ew{^2IN^{*Y $miUHl S^9%3/'lαngA% }.6$.\u_ѩYZ Uc^z/- ?4*shxlg*y%"Z\X|(@Gͷ.ؿ,<>/ßb oψayY4T[LOh`yEԾgR{i(ROu.렣̄tHh1?{"%ɛٙFoX s(y>Z+C |y2JIFOfjL7L]ڻ#kP>䋁XRRIO>#=~}wA*Kn(dIH65"8>Ԡ#F]=[̾X8k`Ƴuwf di*%%=?DV(gĞݬxp&Ikjh+d$[v̈) <מ:w3Un1ɂq؇iҜ w~ULCh搘9SS "#̔7_w9_Wr ^]CsP.[^e?RCwGd0HL* <eSt<΁0;(<)|XC5(& _*ń8.cQ)jqoGw99C_'d14W5(^ ^+˅Q#TڛȉnkYF%+~IL#f_ǒkY#Bz>ِui꿥]guՌNMƝVm9dna9JW+p 0eU ic-d~WjOeRIJ 40<RބZfPӗWF^*?Ԉf)Sy"D?IJL:0R򭎗nX)1fLfaw 58 QgZ{-" g|[b ґS (|sFW 9V~-JPƇU endstream endobj 1049 0 obj << /Length 2201 /Filter /FlateDecode >> stream xYKoWiMIq @IReI]33/il7iwc qޏotq͛ױ^bb/n F)2^$1%Jf9O]T_+3.)#Zn4v_K:ٻ*!FŊ'vҟYׇjH N%zYZ-W\'::~gH_80roz"C$;PB}D+nnYLb&f!]ݒEy㿺KoK<ݛDem=4jcw!8Uc({Q0'˕4*viu=rT\ .{}>^( ۏ9 I;%iXÓW(M vh󍷖uP;GNZkmܒx-?uCѺ~XEdVN+M$u6Ep+9vEe3!nyv.88[;5ZڪY(ΎdظE"P(\Lu/wCI"B'}39Br=AJm Sx6eǙ޸k1j7Wqoސvڐ Ao,M'-6C͐VhUh. /}IlQDgƑ͍sJ1DIl\e1jyLyD?Yp<:j{j\-ͦE]7}w!P$7'EY9&fqtClN9t]U5X)!K3Bk-y".0eDXwDݺ==&決3ڗM:gePyFRp"$9Du?,UCɁ.l\PڴmVP]Lt 6._T-ՕI"-faX4Vdx1|7f-wk^V7]Oh̐-0݇'G)}ƣc u[ÈD7 /q'G^Q"\ ^\?@*{I7mS .܎Ǖ[|g6XW,^,:@=Efb  u;ZٽW8ģbӵn>w+L2bn?oh̊S|JO1w+{bq]"ްHq^Om Ј Å=8YwjK:'آ'BvuG'FO D?"`?y.H;HPpoЕ=Cd /3$ azz;'aSb$x&hHA1iuNJ 4Dg@9}  |}0f夺y*m#-9&GǍ=5>pf/U|MWd2mBl}wR&|LӓMhoҹkYvh]pOL<0D/c*R0?? p)؅؇:+6oP Q%Ƭ*!& Xp@+Q|1~TC8n] endstream endobj 1056 0 obj << /Length 1510 /Filter /FlateDecode >> stream xڽYIo6WlPd=2zH)#1Yr_J\eǙ$.{gO38◻[/ v!vYCagv?zӷ?o}қ N}9f<}xPm}+|Y>n,F.iAJ4-o $V> kZF+>'$ }qdncmtIʯ,_,-+-q*/q&T(6)Jq2-Cv-_;RI$p-Ga] "V|TtoG76O6ޜ}-vJ,-G1iak40ZԂ~v6eDҙvjzq)>K.%>͑ z&7Tail3K.; vPΖ"H#v,Bf`1FfaO;7<0ya)pkyw0*mD3KuZǼAZBdF喱qZ8 ?Ez3᾽IͲA9vNEU*ej0=1G"Kf@}>Ԙ`O6UV$:U@/y^[TMRc LrrM6qє򓡣XӫSjbkBƢ-q{?@Gz1TB6 x=+|bUѴH,Xӛ!/W8#Ҋ*|}uG\8:;Ne˩S{;Ԏ ҋu+֛Lݖ+]ґ?{BK͡L@wM2&b&:ku#OgQhS*'\+.i1[*)w6^AKB5:ylndbbB74M5khfeϣNnUuc:[²tşV"h#nʢ3uhVzgkS3msZk2m*UG9 N?FyWӤ +A8nIx;ropSV9KW 7pFhl)=%FobŨ:ќjbt[oC\e e|J3~j@\<%hV endstream endobj 1060 0 obj << /Length 1391 /Filter /FlateDecode >> stream xڽWێ6}W $%vH-K4MBdewx%[mîhq83IW/O,n0gʐi2tψ;#g}Lw`*28>sGq;}Q'avRGn|+1Pݔ vW?̃$[/|%zq*Pz`f~B,yVɏt^,Dln!d&C1ſCCbA|q}=*RԨFVFK;a&f01IjIVEWVZɝ~R6zV˳Nkj4֩?R r$-uZbd[o=*B8lnMR"Y%O2n2JBdhzy@q RwKRŷ4 phh@%RĻ3BȃճcC`r|#q$e5xLfڨu(nS̈́/("{ro ['5KS#J[yYuܨoES )4&#C۝ <4G|s LДK[17W݆T!|<[b';t敞YL^zBw2-RH4.b~%?" pPK7;SڊS&tŵP_8/{R#!F~]ʍ^'$Eiحp]lI޾#tȿ1Eҁpm. _=؏o;l+lSQ^yE0$B@^-k7GCȄ M:'czUl Fɶ2il;f׶mؤ~жH>`3ɚ7=FE+TJIOVE0yz2n`7_Y!(#JٳD'@'1S =EJ~Nm`Vh00DkAj > stream xZߏ5~_Gxqe[Rp#=lݩNHmw;k;fiS )4Ys TZhs` q c|,Ai 9Zz+ )$!c҆ aEB A1{[UewUL\ ̭Vw8PZH 7]V,*4 d /.\`kP;Ap)VsC5 _+X;5lJ ,T];6HdnQ⁤ Ś♏`)) B*< / ?0a?'A V=REt-LcAY HP3 ">%HaQU Us zb԰/B>V*'Vٰ$sfHͷ?q`Xo~žc~,HZe &nQF3MsPG3`OF3CϬXŊإcq(P$ %ݺs!JX ",q=Tbk `+Xv,Rȍp7Yh(mgO޽b__-~ͿZ.XNoο:p s.V!n5W1k j_'Oe| jyvyLXTqև#< &aO?##RKX\_yH1*K"OFKYj- Dpb33$}bJ \jX5m`OMxsn~faXlS7a=x_atCI aX%Zpg+L.k,vKb9IZsCDY`D&}xwU I$fzeIF0P/s5"]ʨӢكlv@`fJ`-!N$;’UwW86/Pi+{kG|n`-5zqGgǀNYxt=<|98ŃfuLJdR?{O"QIQG')fz* O/V> );-6^ y/^I%EY7#:z3&$$FV)#>\~q9!#NJlNFkLhģpdX?(4M#MdbM?";0LTgo'L"}K$;˴M=NBwg)P^,1ts+ [,)wj3hvX]JF[ISwdxҹMzYN[@lُ&iK  ! aBr-e> stream xڵXmo6_!&Kz-  [Oi2mk%O/I_;EqC:8qb:L9(%<8$侳X97n?.~~q%#N$Cxv]+Z|3j("uc5Y-vB^HϦ3yۈݾhHQ;$6_.5AZZ?|JK>wŁL4'\]IuD0LI5"|FRh}]X;fy_˻[ԂBl Ǘu-ʍBoģԇͶ?4Gz%:#9CwƎٛaC>iJ|:nv:+ᄧsXwNy7}HS dh@(`N񅂈}O҄}=+.Xj0 \JX:nW6̏R1_cCrB3 ͫҲFc/a&$ EE3u` ij3J3Dvh-O[c5|=5ĒS7( kPv*+boŠqZy<&)OBi gD+ `B,#x v[洮L 4ڔت0*3 %u!?@2~IǠy4I/^ e1}YUW;N.id휹sAwzcp-ý5lYZn%V zk$oœ3P ` ÏN xW*r5LGlgOCyNK-wzP$TS5 ?_K0qwp :uEs\^NT҃Ą)'x<^7o>ec3.w|ub\.]@4]o7= v2"U7WU{,U hjIjbn@kZMj9W=wl2[Z൨M"z=4&AXoaie 9=zȯM"kTC90_8o6<&$'h`T&+n'97tg endstream endobj 1107 0 obj << /Length 2206 /Filter /FlateDecode >> stream xڽZm~F\khһ ҽCXp%VW"dCjyW+J8gYWn]ULu¼`]ݤESZls~<9. #XLα3f"?P?OɽIa@∶~T.Uٺ~8dʛBVx+U{w]xq.ER^h@8ngRࢥy&Ewk}USln>9fr?Zm]0pIVe?Pܔoܝf4^!BfqC1o^,xı&#V8 uC\3oE :|9[a; M\glrqLҳN7̮^O ;c&?7 D BODKLT" $tglyk%p07zI>g1BDKvhbc)6 )=)AQnz&Nz^>ބ~|RM#PU_[vkUJ ĘvFb{wښm:vO]/K~"`~0}O[,Rm e]CH'NX0`/ߝG sC ƋGm!KAE2( qb#!l"8 )Jw,kei0w}κ<\ɽ(q!wOҚα:;Ý o'% FzfK6n5\0:.6> _㻎LKYԏE\ ևbp%k)6U" 8l?Gy\ e>q< cgs>;Nkys'.D=]l] VpqVm‡ddER?&"Vd˱ΔL|Zoߋ3}MS;iplk]djxp븎0 f>%Q=:ĴהΪ;ܐ c B?\S>?)0&Iou/5X(Xd qҊ ^p ͻV鴑}ݒ#(U^²goD[#XB7w ^UvXE{DWܪ^G|+UYK> v Kr2O6ejoms@`dW{cO so< endstream endobj 1135 0 obj << /Length 1415 /Filter /FlateDecode >> stream xڽW[o6~ϯ:э4H ]t. -Q6QI(Nwx%[If+`99߹ПmfNjW8e(Kd*g(NfG8 gb._'@2JcpLI"-,$p6 N+8?WT4[0~Cjפmќ1oq]oGe 60KAIĭ} A'V_)Zv(4 0K!-/{%@ZǞ2TAy=(^F}ԔIַF4boS5&أl5-'*"mFGp*)Jx[1}GEd@ًIRphbL$OS%UQ҇)I]KvɄep:?ohC'(Kc' >8[e-j?hX{v+YA5m`d50٠/ƅ !$0pP^^(x|-WJͶZѸV4JY+NTjPN1P/`2TYVuEͶcd *[IN Z7*7]SO]kq3yY_Af\I-m:`( nΪ|"3A͸K"E/QG?XoߗU<\m?Uo t{Z}jMCj+?${֮ͣ% <Ѹ\U{Gb .}iʍZЇ7cl .}Ri xτZZ#|5PqzD`;fObZn@8B0;Ml w(/IZUEE֎p [0]kttћOؚ"htD͡SZ4JdPbKf+'-運/c2oLX3nYjȏOci}fO죡KJHŜ\ 2œ\E ?Љ[j03ݼquWIs5"UW7֥C50 #Cx/E%%W!ѬOZ겐|Q̘o0rpZ!V =~ed&ˏS?1kզgAܽ]ÔI44MhioD ٦XcG:x^% ^PЕݻ7I<'( 苓2Ԟ`JRW?4㈵F7Ei$] s^funG蘿;XŗjڢeOóV5-9]g_@ endstream endobj 1142 0 obj << /Length 1609 /Filter /FlateDecode >> stream xXYoF~ׯ [kM4R(+i%ʒA2m+Fzs|3;#Y;zr:'%iEtP%~9qҙU|WNj0m \Ja[2pF^lw5/|z[=k=z$lUqg)2=JR諞|yu~|:>p+5\"o]bj5ݾ2+uI \}фάgfYgvȆf~ޝ㩦GɐÃˈ/ j_BQ>ײ2_}SJ- ak|iೂ_סMM)qYlE:qn֭P(ŕ+sM~<\*A(QAߪcs#iBgcBwt8ݯU7R҇}mx TX?(ic(v%;ZĉtĖK= W?&^ژbM_Q1/>‘*ԌsVr &/ܨpF:ᨆh!VAҬ2MPyei )#è.&G '3!0sM2DHnb_ҭIО y>IJ7ݫ'=zU/DE>Wȯ%ﲍM,D^k1Uөнl4@y'1 'ՆU߆y}m P#B\%EZu=!([Uݰ 4mw0šjUȄ٬݋$Lz#JO<3j_]% E_\OQA AQZ+aKnUٵu$)$X@h\:b;T PwMi-x* M@])==t哂ڈi( XGG7C̀=o@ϙ`H :Jg2ܤcL4’=&Ibyݿל&^aBQUXJ !A&M;JD߯ҟ Y>ۓRA1M\<愧@biy,8>V> stream xVn0V8$b8vȡVJpj+%oaaP=9ήG 05nFw@]BY xAb30)eg;`=$aW> stream xVˎ0+(Hccl`E*M*UnjКiC~04Q_܇=Z; Z/g/ֳFV bSkZBj}kX 2+|I3"FÞLBE!|3C "Z*PM1!m(u=\/oe5e.#9iv!68Ah08K'z"xz0~c# _]ikL>-!]5ᤝ($Dcoiʤ#!af978 s1_1LPgSQEx~|i,]vC;eQϼ6$B*dV >qb3!lcC&S~R٠ɬ,}#&;=2O({ʠd:'>oKzL&폺?iEr^ff\/t]8<@xxL)!<eޕ^y\%E9J^4DS`\ldһ+}mqMj9.\;TD*L+>^ZX0p0: .J'K C.\L4r endstream endobj 1174 0 obj << /Length 3330 /Filter /FlateDecode >> stream xڭ[m6_X3")RRE;4w=9\٢jlɕ _DI+Y,L93gJpp_}/$ɛ Cyɀn_"޾qg&$:zؤZ^E_v}t^_,N{w@\$I3㻻%K-jl6yYu<3) }^~ DaHbnɃ`QVMt(Z7fޘ߯wL,*OW;{QLbhI04%8bA;LI@h]h%䏿|`LtѢ*lE6w4X]fd}!{/Ү02^ng;jSV{ej9V"i"^x0{L,&@#X}i*zH{a'"2nBZd#ЈIdZ~" ⋽A:wyԦO}uW]9icZhN[b˃➾ѼuYU5Op:d聆'j;wwC{.jL@Pb֤=v:ovkX25Y(b0;u*dFUhLPеRͣR IU$p.V4J !/PE$Z6>_g}Mz9tةQWH9ԼV&|Kd|ɴYoGtL"V`GN/{[0ƭ$ C}u-4$h:zʪB+􂆒4]7=Zho_Qpyf`B%ؓz>2d\x?0_cb𰭜\ymP[+eL\_fz;q%&J.W`yޖ9~RjpNY>[MW=+TƅL5i;׎Aэ#~0<u7Nڭ~D kENs qH|稜Fv; }J͕ܴtvDx|<(C{sfLLz.&`QtTB2 / db„D<|p޽7O^ϫgM%%q RZ'Cj~t I> U&yr OkSnFM*3,J@{;!Z2(K$!y2pVXuy/͵MԗW'7"4$LEQ6ʽi &}Z+]K&N>j#q2ףbޡˏ,cދA!0Ve 3upk^ - E UavBGNfr ޠ^\?. Qec: m0cXܯ+ _bb J@M5*=C^ʱp76՜7Wkz=sMtjmkQv9;bE-m p,.@{)ϻ$n6R]G ϧ6  hLe.Jux9,XG#-3}J AӶ<2a[ݻ;U<`z%!\!FհPWZgS&pÌmדEtu؞KԖZPe3VA|'J%HwÝE`KU3 F5~*H"11 Eh@"aW_9ҸN4;W@ Չ'N$rS;b{(~yh=n |@Ұb&Ν~viVRpY~ 亃 ë1ZLjCeC8PEֿ+/߿i-D=h[b PŸFhm^n-mcHEjuZy. `R3S0u=TYeZ+p`ESkymF;wxZieuGc2# %` Q]~)'Eօ[cSwk(ŠSח<%Yybal1/[|f=_hnJ;|jԜj.!cj Vgb]1E~|7U a dX|#?Ý6B`gČ0J/N(hÊ: Uc?cIg-#0CΆIJDDRn" =VZc%^O cz4SZ3q`X^s Qil4<źViq%^ PVjbMkN1j?ej/yqy3VX, ]hϋY2~' 3Kf#Q G;rYP]Zwl ~ &c t/FkFjSchvWu3Gx.X !Tx2 am]687^W4gjӦ-.$}6%{NU[kt+n`sA<@ X,$9x/ߥ//dȖ t  +bw7՝ү5QCrȘ5l߬T _̪ppk"T6HS3fƭgrgRqj? endstream endobj 1069 0 obj << /Type /ObjStm /N 100 /First 973 /Length 2159 /Filter /FlateDecode >> stream xZMo9W(~]`89쮑CGn;ȭ%c(mVԎ[`0ڪ.>V9(VR֓b+r+A9"rDʻE9`5D`TN/t?> `DdQ'<bBH\e (XwUΔyeYD &|7GV`|`12c+YyC2nf-c Fye v) ({6bIcpDS){+ eFpسjH)T`("?Cʊ'DE YHx!dd42K*2*Zѐaxèa oU(o 1nEW(f EP"+pA  +U:8 O#& $S-J"Eq@*a"$["q xBy>'H1R̔ I¿}\A Z(S`|[n7\~T޴6j?4W'`hZ5'vz˶o'u.`i5=@V[vgXeg ){2{}nSYtNf?/Û5ܖA<\-V K]?Y PZzR.;}l}vѬfa]]|8%$Р&#l!`~H>Yv !䑘DԂ!- `۽Юǂth{k eĂGYͨU,`3Qєuc@@ݎ7!bm!@r,9d:B@֢)IȔ B D\L"-}QEA1\5:mY}}r`,{;BV30-6ns@^JD"Ь7 ~{A\k pQ.\ob%} pbywuDA`3ddK^Y\O#a+/#P\M7*B2H"(G:pxiP|p8,pD(Fh)룬FF;c>y@}廭NJ:6#C/?\,"J:BCú˻]Jwkt*r;r;r;Vz)ow;H#OٽuVC)#{傻Yb:@)$SҖIE/TGc9h/O9B#̈́eܷ#ryfQлot Iڒ|psi.yZ 5sb3r". DKw&f~ʞUm!m:n2 endstream endobj 1179 0 obj << /Length 2395 /Filter /FlateDecode >> stream xˎ>_a%2戤HJr f ۛn6ֶ,9zL)HYL'H\LHb8{?ӻ2%$LΞ63GrdHg`6iYQ"}"1[ޅ?,Dr e>?3&? r!Iق*B8b`POnfbAV4X#)4hJe!(].4>+q ʭ 5$I2&WQ9ZѠ,,~V/5703R1#aCVl[mF]4{YCG@b @@ -Df?-DR48 4ODaz;Ib7]yЈ ʞ_F1bQY)# by#\& B'lS3_=&Rtb=%&wl! +f9"Wi&KFT]BLD8p o%Z)Qbd`8K0R㘚!~U ΫBg# )_npu ̶s6DX*5a+-qsuoc BXR7Z/GO0tp؇$pA,x!cduYR:v> >օqYR;k7(YJl |6P{._K.e.ӧ ec ߒC#Rolr_mTuv6HB]Q$eUZL1925lf`&G n% `c!Y㢍1>OqiXh60=Yb^Ple=inlٌE7{v?j@V zYhU[BUq<M ycP̨εؕ,m-6|nͦs5 t'lQ8g+,88n78DExdB !nǁ"+>(GuٜwRA1UA+`sO^5fy}A!k9 P$aoǁ1GOEy)Y>Ɯ,3>W*jzI=ܗLFlDW9ƾcIJVhY-VwB[郴rƿ%vKOtYO$|1כ06}PՓ-I \NcRCgl(~ǾP|`Z[ڵzȾlks1:|nn֣V¦KQ @DX0p_lwv"|n x:zr1[46IwB^rƃڶcnN4Jdk3+ݴ p$9M7O6j>]v[C,&o `#houWj'ڷ7-ZҔ+uwal} o#>t,"mfqd1eyiEހWyBMǽ"TAMJjw-EKv.QOzD޲kLb3iؿixqrs+(ϙʈuwE+*E$޹W4 m`08n7ljrX ;Xiڊž]eܦvQfjMl^KݫhbLzhʴ]}",D3RL'E I@礘P {HQpbb_n#8jhE坵nXjMx9dRzwR= ,MA9p)D^~ J]> stream xX[~_a*kE]R )"EE-DLdɥ g(K>ڳ'( z8͐渉6yË/˪MVyoEa",7 z3EX%8hU{ "=`_ $Wl;w(0[QVpҗЙf'ʰ  Q9%P 4jh8J 8.yf#X}" nnL3f[;Re함d7dmtw}it;<,p'VYFpKEfz44|wR=m{gY3/fވ$L3pUFeNjl.kI?`8i4sR".|ܼ'0-)$ ԗV-LDqWv]- r-\m' Z"Aw1j2H"NP' [%E^t-` - cN! 'm(x!R۷]&$}IF hw s>$ͳ͔Aoֲi$%@v ފ`aXITQ&I/(LEuŪ!4drZ{+B;a(m"Hcu ۝ZH3U? UqxF4HCfԠ=[hJqX#ty i2EʹVL02_9KQz:]솙eX!| U[`I0Vuy{ F.DֶeڠEə$ '(Xh3,lYl~"E qL\'\4cmɛbo058 %g40Dѷ_J:Y\0Y@у㶡!M |~q%.uҧi G^`$P" SVa4 觓9Va%(PS)2D%S{$}>F=!V;8HPZ} 8ȗ9ݺ$KyedgC5Cq/;$yP`=o{BUxs nԖ;w;0.=--#y,U$w^/XZI3wQ9ₘ %3}a%B4r- |^҈zS!k-FH.nсVA5(BGHY[ ({z'I]op[']lh@j÷F*pFIg" Jϥ ̜ᣩng%N>SԿȣ+z`3M.K\,:%`(~CV2-dAﰍ.5C\~T8@#>Ww :C1Lͪ2qprp@2`p]_M%-HkjRU!Wb|2j][8t b_kxn)>peS~ w}JVaq뺿KjE^kB)Ў&a^϶Çk1~ :yVheoGËHu endstream endobj 1195 0 obj << /Length 1644 /Filter /FlateDecode >> stream xY[o6~K{hfP< ]k8ޯi^dʊ$/'sίwWa$ Qt].e6%ݸۑ-Բ} n ,{u $z^ADZZĹ2 уBjkMz޴t$UWc" rnCdH]amZf98S+2ܻ *Y푊W:ovry?(/ZʎԄ#JҦO(k0 RHc .i [ІQY6(8Q+\ofQnᦋb[/ 4Y)G`BT6/*N旤-7*] иA]Ԯ:OLj*RLԒ{@PA1fӯeN! GpzO>um;j %:vIɔ>a?K{+`&h,xhfLօ(Y<(xצwuP{||!/ =QѶݡ>B<ք-թ AZmX<(2eզp/ k*d֚$͝Tݐv 9[N:ؿPFZ^  (F7V EH|(Økl!o8p[n{b|,!3@iP?uslX4bO`WHޯr3]X%/orNֈFǝ5NR܋JguKRJGTZGFM}~0wX ,%Uqce0扔3@:i粡ӍY~!0;OuwƺLə?|4e'&_N?rz;r|A 'R5NA~ vz5!kIl3wsMYQ zJh*Yc|m!u}"G!o z0Iҭ_MbՖVN Pxzr`-$"ċm8*V =s:xY㸫m?0[%T0,%=Pc݈UD/E $A 9A|.q/}nŔqSQ> stream xXY6~6 DM!@}H+kyoҵEP X,4[,~Y-_؊QXcQϵVkOׯҋ}D59gX70/wz K\l7;`y}9#!1”*FJ#Hb.ʥRlׅzu9 Nu4'|ԣYp81 . m% DLݞ\-bKj4jtDFv%|2g!^(^wKMmhU0!45rzJ\pdHfD3  1% &Ʈ 9SYeFK1vMP.g bY?*mHʘlbGemT@ޓDt1 ?+Q](_{ͪD)0*N:'xb'st3U_DZ \NJy{e9Z,UthXh> N/8 &"x 0$<kǫÕV,GpW>arC!TQmzCи\񓣞ܨ9Ip PB%pɢVs#)#㸏bwɕx曖nmzuJc5FgtAm-> q0sĈG JsbD3ˊC>"s!quVw9_*hML\p\. ev[Up!)W(Kn~JEa@dAK6;wQkӒMWS ( o:ђeoτfb(m[MݧG=SP9idtj*i2ECK.ٮY.XǾ"gVU4"7"*BGA}S2KrL;׬fi |5ƫfx>\gp0S4N(,FtVnhI+MO^r]Z;Zlj&jSIn׽\sv~ػ\&EU}LDI2eq]>& 5 ӘmNJ+lY@o "t mj '%Dh`%ŇOZGyqdzd+'VZu),ttM.mj,6mA7ye*^M44%!do ʪs41~&7ht_)nWzԣbP5h>׃uhKw7} V E׊q^7b{*_W&@f"4*Q4_}܅@ٙFތzxd>?R=زjyPin/YLdU endstream endobj 1217 0 obj << /Length 1586 /Filter /FlateDecode >> stream xWK6W_")nNyh[,)]wVΦ4E f ~r={v%@""Qpp"ʢ B) Tm*/v 1fvFToN> _K5{]+7jr쒅cSAs[)!o.< 7vUa'tqVWBn?e(v،pj^4 7*߱WI8y 8B"!=oAkܷsvUڅcZj٨#[NGJ@=ؔEΫB5L[*P1/o.T8H@`\5گMqq @HU'QtF+G 0Cq(A|e$1[wмfIi,gFIZ1$?" X !1~6j#& cf-Ui]^^EE>^IlgY+\>OZcbz(n=!J~,veFe:JnSUVYYiJ |~lnV~Wfew* ,MJ eB.Ӫd^uTF[^7(!px`wE1p1FuUfyad,<$Cn@O?2.|fuT^v_Ҫ~|ERԩo4V>zdw\Q;T2;_WKx!(6Lvm}aLs D#{p67'tSh0]u)Oyg /zvOI3 TsQ  ^J&R./,x`?FDb^˼3ӍLZt̎ԲxQk4 mbIdt ZGW9H6vlиʍY״JJ6#yH)C9_‘tuojg LѢ-&a%a21W?R]y[4$]0FNdzsw[~( MOvS VyuNZ2GC)0S:öi]n?=]+`q8J;86%4CX8xwtWZ5xP>mM4u}=o? = puUWBngUJVnHAզdoMwWe(!SF50D?Wv˴Zoȡ[H uNQ(s ֤<#1ޚةNmR[{LCw*` 9b(춆O7wiz`⠝v^`|]yw(};J dY'RIGp ]w٥cDai+·wCF!cHe͓'?x ņ#NL/:KXϙGJeY oHrRToK_{O 8 ^{O&~ endstream endobj 1232 0 obj << /Length 2968 /Filter /FlateDecode >> stream xڥZm۸_a,y")R H] .{ ٦jdɕ$3Ro+FEf~~{S,aI$vƃ0(`JfywQ[)b0us2+p_8`B08I.`^.2sKVx1oCnhByZe*7Kj~}xV! 49 axw/ !,^f} NtϚY?H}dUpܡ2u tjX87 ^{=C|,RX _%]鍗E(k ,ω}ڬwj?S4c&#)G풇V'v+b3A:=G= +lǼck3E&dIؒafgh.㾠qMMô2r^CZ1[Z3:OpYy]\M:ьB5(NԉSʪ!>ØŰ;ovYyV7YqO7ei O3Vi/_<RdGSnGOR$,YcLDUʐ BLL-=d1ͳa)/ sдCKJ.?S 5tL 1}ڔ 86gT7l׼qh=bMfymrx8tKמp;ЪaܪY iS`捩  wmrڛKt -n<#.c"vqR'άvc\J!ä #7_w~ܤ7mnV7'GL ` / Z_BwA4' :w% k& L2;q;.e2d[|F0d"fB^Lk #`vlh90-ah{ƅBIq o:Kqzu82 #51$3ޤtYWe]/A0UJЮCeWUVؠZ EG>~C0I t$<'lpMvT`-:eta@lWo9:K~J SLD&Jv0M) =/ʆ۪ӈ/ TL&mR7WAujvAgč,[zR-Qo)P}! —#ת*$2)ct/@5+l۪0B !uדy'/K\pKDnAi5\3qje҂`x{8v&Fpd { P}%]suWS2%-e(QPJ1Ʈߢ\DTk[R=VͳLyL 'Ě;Fz27N`jw$E̸hL?dgG_7ZY Pܮd/Bs |S"aBЖ#%א^ӔD]k‚}"]m8sA1ÖY{Bvt_?e( qP]no{b>Ba>L7SMY~؛=nnˡKJnycuٻ-+ֈmC˕ 'FSmA/76 8wo(JKt6d٭8\B$ʱW[OktϰXTԝRdR;[aܦ\$UkY) NSs7@i/0 H3GY{驣eoCf:Kvپ 8iAYUu?@ٹٌR +DI1IŮV뻸RrW_$8B=|dp*N8‚++u}Ֆ1uN~Mv}wjcCpXI`j+EѾ؟2Q CMG-c4:̔?Xokl;y[>n@xçBpEZe !mOgHDA"YgIM@F[AfX³?KǜuLSqkORzsCٴ)[^_܌~te`k3ɰc`SUlPH׮OVѬMh4R< Sh$+63:1')!OUD_o_FN endstream endobj 1245 0 obj << /Length 1530 /Filter /FlateDecode >> stream xXmo6_! 1KR"%K 통-Fb%/;)-wNVl)<ػk{ J8:(qX@u}nk(=zgj[0wve'h̒w]QW-WaM[w񏺨ډꑉ;DiLjDh}IhꍚTTFhdHI0aZ̛zkH.M+LSɶ{Tq1#  OFuNm6ģ(EUJO'NѰuk'FG8LL#{Yh܊];NINk]=4}̞(D)x8V$Bo})d?p|/Rv4w.X%,D!%@E;oz+œ@099}*`Y唡$E+C;J8I72bƺ̠\Rߪ&Qr++]*Df<{hHVJZ]X+cwKx7d}kE=uOhGhgҧn;I;}ޱ;xLJ11,1--M2xʱp,oegF¹UsbƇ"xn$"\L;4tcto "HiM@`oLZ;]̠tkԚՍq>RWARaSni?X4C*C[TY/^f:b: -H+%NUfҙR:Kz3xkK~7Kf5kW8uYCVT7UӾ]ץUآE&3#;/M(zL^U6{O!*9׽a!Mݶ:6MGk*AQoɶ|:,gZ6ykSxLP6%^ :7bCI$J,0_+)KӐ>6蠧H=7J3z3h%:twS&)vlՀ[) v\/\8ޔKGPI-$@x&ȥ~Quh$N訂H')|~۝ۈ#[­({e|~O[v&5D'`sy17;QiStyVΖNI Q ndMk:6ďjiZ(" rjyuo4r:@AubGHkzQk 0Ȯoy" '#NҪiV2n:DQ9u JfjGP~ )TѪdԚW(ft+ӺDcK{]Iu+݀ɠتnNJZH6&U\]O/޼qKB>3Wv7/'3@.Ecug;@|S endstream endobj 1258 0 obj << /Length 1807 /Filter /FlateDecode >> stream xڝXYo6~ϯ0X@"AIآE}شE.RŢcbu-;dI)q8~A/~:L JG]DΡtk$$AУdf#.Q zҮwHԋKǶ`e#W/\c{QĞ5B<-IUSգ}\%yf&zB=!Ѣ^L%JS|̙arGV#l{Hn+^-Ak×^h9*Lu]`D:.!( Cmfe14J:Ff 3sfM[zmsyk&X@t$ 6bM ?іTu+hli~ˡ"-mFV࣠Ch7!@uf4Xꂗld:2 x2q0'c͛W))BrNa<YmnULQY2z$@Q4tDx$AQK z 0_o&Co!eRWp!tY!OĘu&kmYm 6ToyW3خ_uk]ܝo-#ᘾW0}&@Q!&?JSP5nK^܌v!ch<$*a3L(\@)דGAnXm젬":Q $"d}g+~bx-u uq~2<;D!ҳ3p͊JSzfjC6¤]F萾L]ƌEIE$Y$-kzl;!1lE[@)=av TBDQhȅl=qr o%~v?G1oմf@dpNU] y7&(N1*u+7 4FqNX44}lz} JƊ"m,CՖ߽@4'[^vrnYA>E&}Ⲍ(sQ`0I1ehTGGV۰˟?VVMUr]fUCU&P(@+!41܂ >8Kn0 mSvTf?^f endstream endobj 1269 0 obj << /Length 1569 /Filter /FlateDecode >> stream xڭXmo6_!`_lfIK}HfuX~jL)G,y;HR$H>w44mz*Ndqe(%$DDap>ϛﯯD sH&-'JdFW)\.4RВ,d'T2OPoi$hnV"AX<)#,뻮ֲ_,v7TmfR,EJ( 1-FT{/!#K/۞RR'i[BEzoVl;}0ty18VzQ.HLR?/-x/ zLM8g$ߟ0Db>}=wg6nыF\vc_9*e1A[3v}rlZjnn.EVg_Ϡ.rngogӀ9VSlg` DYkmp_g9::!P<ꄋŜ< cSx1 GB؛}ۂ`PyM 8+)~ \_sBc6(2:LbAi%u@B߅$b#(wS,՝s JP6k8r^x4eFNVZHtv?> stream xZ]oc7}c"K$EIŠ@?m(2;$rHC1HG 9Qxdd%s|٦n',6}L[-lA a5R5EKxm ! %QvMTd^X٥@R&‡ghu8& fե$-%mvgxlpuC,YSRr*AuL ~H{4E aD-xhLZ aJ6 1$XE8:Nl}Xffn8YmphUdKN (&sNmriVu%3%f ^"f+, Ғ1oh@2܍Z=k9$o|r^Y}֣΀g5m1V̍t݄o>}n4wҨ[onjowaSMH@?5@2˰en)63F;r<Ռ*UJzfsLzkV&| _߼[Ͽ\/.Eގ<"t%FXu}17=a{ | j5v.˥?_;61g,;k"*qSu>h=vP$QRFE#RNDCj5SBIu B:&u+6n^s(<;YHn@jj3J<*(<` VKO;>EQF+Z$샖@VW|>^O]#~KFg0kSKjBS!JCq&@m>I4JI}ô٤e`Q<9řSS\nSR/h/^(PJaLjdot_Pz" *aJ|zY01D`QgoV/̐V'5q؝;%/ȣ(#D|ELFx0YM<" Tv8?~nxpvt]wW'p@CzYvNTH;89uw&IЬ/jE8Edƅowxn/jyHm7ɪۅҲA-Cɘb Pm+(gd1uܪ-;@_mxb͇wb@~U״Vd}e6ݻ@mBNa(v)/ endstream endobj 1278 0 obj << /Length 1154 /Filter /FlateDecode >> stream xڵWY6~Ї+R}p"E$/iP2mձ;$G^I.>$s~shֱ6c2q1gX[qV;ZS?}YzN7`(<_+Y2Oj;q ٭uidqKEՓt?ziC˦ ]:l8ms&:wœ s dSDP͜^Qǚnmz+ l6t%X4Ew4.7j@Ȑe-˪~g]Btuz!(Mi;h%TV'DcSAr+&8  qk(%Z&/Vz(NՃ>T%B0^DST49q'CC2kY1>hob4t0w}0ϢJJSGlj Pk5>#-J) idKUXtZ_Ң27/9*QVM $PVheFT$uQVDWfqpqPlc=@WvZ]+Cv<ύ1&8m-ԅ2ByeW@똬8ӿ! Hd`l%SoB󘔙!0O8zN*?ͼTy4P5W. YSpץ"9gzpIk 3>uQTnJH\NUSA -nsP+o;`Ub8 a=Pl Ӧ>P&s:C7ctD{Oc5 $PtS!IM3YU2ߘbYtT;v]x)=s ;.tvt*5PkƔf6Eā͢IBW#a*ZD [Ms4{Z%B"yVI-itՈv3:ZXJ-"5]USt?!0-^-`ޤ<\Z#6~>po3z> stream xڵWYoF~ׯ[I w\F"v@mlX+eIZֿ%2h ރ3#zqvNd1땃 (v4$uܸ(_Og4GI aI2/G$ P:yCN?9)K"(}|]|9;]cv(?=L! [E>x>j˅ޯ&˶1\&.(ٕMCzc4q)~i 8 (ZEUPV8ZcU:>NzSvBS;y|,{nۮ+[}M/J m;yZU#MZAFaFr6?yyǤvh?B|hR" ֳ`%{z^BQ#UPg*yo9S@g5S Z.FDĄheCQ%YbhҒ?%G2-u8+4s&zxcwKA^D 4y&&P?LtPsH{ sgE}+deB6 `t*8 uqBY+$0tn*2GRfOu jO0ɦMb{ê7 ӋqZYqC)IKixh33-gf&=LҔ\D#98C4[BKn| yUyM'1N\ ϶Gi{ðm1TзjA:Mށk yÿ)ΜqT.JC! 4"Ry( GԼcz$u6'躴?kr]v\h5U`H endstream endobj 1308 0 obj << /Length 1733 /Filter /FlateDecode >> stream xڭXY6~҇+R}ؠImmQ$B+6(eSWvX`yxYܾ4"yQF^l_}t&J'4D4uO7o:[Xn۷oBp+A!XoOkGa[yUXI Yuzz>]fEDŽ/p/d&9||aheVm5-D?-3x?<ΘyP%[cRJZki XEjy] [3Ss N~_{[3u{г<LG^/FZu0=nV/<,nk` rEd;t̵dD?DgUƔ*H8ukʌu%rў4]rw^:Oga4 wgͅmmtzrZp"}C_nNuE+[τO VFG^ȘȓɛX6^43Z#JFJ?S #G ri kK 0JJHFs! tN 9[ހFCRg]d`WNČ@38 (?;×wҟ-Q;n,'~B (9rB`jw Sdo'Ӄ EgLU:NugLBՎcmX4x(l(m~# Ge*Kaf{JA|kL?PǂYX.f +E]GtcanfvMVItAb\DI68;-XiHxraǺa:'cF1w3+ػUu r>a#db:YR Bn7Ja{MNyS@b_?e%xq :m[SzTOo10#(J&W B9uo4%d(栄TBbiU (׏78ʊ ]CD~l_+OГc׺ϲ~>H`! >[d2-b ֓WjP+ՖzC}{pJ|+) C(SgHxtd؍s TQŰyfGy`mnV7[ rWFV1qe("A.>"f+T@v^.-|Ґ\Q-hJ+w.Fj8HD!V[ $D]"C(z wN76]CI:蠉OQT ?p:s\<@AWJAR5RI T#WIF{'uS X욺3}T)'_:B)~!hOYe: q\%\;0yvg:BJ.3!NXLO08%(K#l6(xnY.fѤ!ӰWdcua vM%eԠ\L8HQ.~Jy5bT\+l@-MDpLQ6vRra/Țk&f[0 t\eyS 1]{9v5Hh~ .dn/_P >'t(I ۭס9dzVd}~tשFB2=Her_-sL³,? M"Ȍ# endstream endobj 1322 0 obj << /Length 1797 /Filter /FlateDecode >> stream xXYo6~ϯ06X"F.=RS("dV?!D[zE"3Ùoڜmf۫/W~8HlYIןI<Ǟ-W4y\ދi F(8ɕ)Y퐯! ;kxnvjjw͡ h附,&}%MU=o>$elnи 8X?(?_lIKI6!WN SRww? ْ[QIfkEYDjGෲ:* (!B~)6k^4⤭#g"<6ˉ6H%MYWF|"-Y:z$07x@SH(c7G;u}!ύ4_{oҝ\z ɋ.kj9XUb֒FB=Le!=d/ӟ!3YEE*=^mH-$DTJʶVq?ښ #8P !䎵H!>K/, ,~n>4m*Eٺͤ^4ź-D4͖{5F]Uq/vmS1IM,,[u ^w-9b)*SU*Z<,f͞b _lb>=v:Idq.1ZhХ;qPBZoR+j$ W֓ zU0a]9E05\ahع,9a㈝.-r [n>:E($t9 .1SRedccOJ6O '׶?SP'9&pĕ?M+-=vL^0BMХ/)zS:l?CYH'cϋ% u iѦaЌܨ)ۦwUF ,IYmn+fImS*.Ѝm! mg(PbG|T`sp7> 3fy/РNg endstream endobj 1333 0 obj << /Length 1688 /Filter /FlateDecode >> stream xڽWm6 _at+*6zÆn뾴E8Jbc|״)J] h!ýǽf?\]D,w|Y^s»\{/XEvYDw͸"^ /8c" HPڶu߼Y^񈯥.ȯp#GyK4xK at~4+ ,S\o?w|e77- }H(4q}eYqU\U70QH[Eg. ӖCm(}ςTܧ.c~:[( ",MOqqO;zrv(? ^|ͽ5,BHXލٺBgI\zg <M G)o1OYw;Ce OvGsKږd ݏ B<9=Q,phQuy[4ɛ&;ґB\3F#_L!4@YO^d}Q?3g!ܹh.f%Pg"ʾ4&ʐr$yriRc dYW3֥UjihlFu"%j:pnJ娔\|EtVkmqGҦ{2$BKPM1Ī;+ԽnzͬǀXb4|;rɭ:  6"ki;];V~{]cu埴mW}'.o.}%1o '!t ?HY;L.VR^Ot"5ݦr<*v\kCØYJQND*9YjAReJn]J(-cXs' $ho$B5wʋW esPv'jWM+!鮗6\T+˻l,9꛺+tjjVkQi-pYȝ˷ or a{6ܹ-b#4M1F^AX ȾDJ-6eJBNӔFt4W i]{W Els ZJÝm),oXCUX(ݹQ.wFq᧖Mq&- ՁԚVujF[MS-?zs xm/m_ky%;ЏFTax5M{!:^/OI=F&+4ӡW#ɵ;Aґ:H^M[eƕ]gMǜ@$ŵ; qQWNN]!eU1ϏW%ӟ6@(4 |L:b-מ+ >E&U'P8Lxs3X '=ω}ѷ~fl |M_3H*XkD'iovE#{%+k&&̯/qybM/:h"4u&IVC4ͯ 1ϡtDKbӅi 7:4Uoh,-p ً%=m%}n%D{unT\uҹR;բ-[]JJx[mMs#\$3tP@rh1VU1^a~3إ endstream endobj 1348 0 obj << /Length 1986 /Filter /FlateDecode >> stream xڭXmo_A@\i=-E;(@+E*$u;K.Ygsٙgf9{s~\q>J9;{ Dđw ʼ)݇5[x lNYyzskѓ7~}SvKfU]Cҥi욖V^56'1;T]ۣ<ϻ6;JDD wadTm#y }+DMӠ fG+P60exb\2f_dރZY{yzqCdqvÇP (t')`NdA85!+ag#@+; y0ǫ;?Y4&r?dY:^0~: vD3",my˦^ ӂl;Y4x'6feBX#ݲݚO`/NC?81O?yyǬ6HMmab|Qp^e~ |.WU?HwliiדI֍AG`Rq֦> ŀRΏ9{!+桻_„e˖K`@ByaX*} bd9&fЁx{+ԭke 5|["1Y6قD<[52*F 6hwC=N!ƀ">F:@G G3Hye hw)(.Y'S1̃ o$eʎs]u+'%n!g'Pc4ggMj#\Un`aDFגHO`0ʞ *UCc"+h8?d-0S*͇iO5=J9P׻R/L퇀{YnB/KYGB7v6Rӷ#"žAQNW@Q4?MNˎE$nl2XUZfڇ ҩl8RSi-qD,:<_=~XN, i 0v 4mv]Gbcufc~{gqecw[N15p2]ѵ?+yV缧C;hpƻy1(wvdR!|Bi3Tؽ7E3֋Ig{cv2fjo 5w%=gq8+] JAav͹*HJS]'s1e(#NȟU0*nPNUWgL3ª`)O$lN|V F¨ WYR TڠV$ם/ 5oF'U hj=Ŕ;s\fh3cP5cf|^5/$Lx+Rbs-tFo`!T-o 8)ќ4NH4V/+󲣆]a[zV_v: ' LǸl@erv49$::2GGn~gT]דT@lKXQ#2A*Z9ٳV%W8.z<\q_P( endstream endobj 1360 0 obj << /Length 1966 /Filter /FlateDecode >> stream xY[o6~_axj6W]!f]}hvUd:H$'K{ohӖ;N$:CǏ J󜈙̫nSˠ2bߖZl. Afns>e E zymIV]k6fON{wY{0nǢJ3pneBEaNof - mhlL׮KSmc::J7Y(WM΅&M{?5< .,,YW8E<= J&H(B5iZ[%lV6F.e[21v`3`4^I'd4|O/JUrE%TmQ=^ {!4*Uܯ)Q:C8Ah!d@XhEykq qFDd`zIG~zE iAځ_gyW7`12u=T.FX (s! { [ ?Sۚ)آ*qzmYJrYpV^]ܦO,j7?i+: }"=s{0;,E80 Gk 9S?q~&dݮcy 2R_6&N rόJ7ѝLA~ab hLMvZX)b5S4,fw[dK „n;\{Qr(ʀ8.΀dy𣴳n˶%F5Ca je+B)VAǐ;0L"dQY$ ;OSJbo]cݖwv굹{KKv: "qG%Q0=m!d'?p5M@c\Kpie ٗKla3ͦ 1}|i[[O<qWw C-o@:{TR8Y:&PsRׯȨvn+VT^6c(o5Zyo\g+N@.8W7 0Ԇ$1ktQ|*5\2$F=$k4܎[taꯣ~ `I.i{wFXT=A|oQ%]f*hE6W0񞗝"ޣڽ#܇MXF!aE\y?/oGrn/6i?կ@Rt;"'7+ȳZ?7nGNbAtͫr 0H(:d_ܵ5B{$ž:w> stream xڭXI6W@̈%QzH-m_Cz2m hy~3Rl. YY(8z٫P:1C/tnw]ЉBܮZPO>{rrI8KE\sWu$ІixEV+$mJ.j~nx \8s~ $\8,rCK@|&6!{ɩaa^Eap1iҨEҦG c3{2+GX3jZB sv17eI0!o8Ao'LQ|%SbpSB15Rq||aXYdfL0Hv'p1N1aS=LS{>&{{B&B)2 Iޘ{7絰GL{B`#oc"l s8Ѧ^`^u;bn뻬3MTMjM-oQX(L+&*WCPMZg6ʑrlpˇ3PIiru@=,zI8Syc nHQ50@@J/(s_rHI=m-%`9jvui$`\SV k"xҮ燜CDb +Saa(,, g=eT+tsVi>Gp q52R*}EU 7Tu]xP@4~꿷U>k_NQ7qMZm :V4YM MI|N/P22pJZ3MOwfā` Ϊ*W,l79ׇo ?;ǔ'<1>' «Ao Wuʞrҵ^,p;Sn 1^\Rcɫ 2lu!., D!Gw.AxU%M/)A<3:.Uɨ4osȼH'X̴to 8E=$}9gZ^"|}ld|ڞy30ba:{͛"Ѷ^Ac endstream endobj 1275 0 obj << /Type /ObjStm /N 100 /First 969 /Length 2053 /Filter /FlateDecode >> stream xڽZKoGW4r/=]U0N0tE5G9W׳c2ΐO ?΄/Idh!z}A]V@0\le@R2Ġs6YhrRNXHq +)%QD=QC#dW@!N*bG"(TKD)4Wga< ƋTJ481HbxURBS["@y0*3 jV|c/J jfv c9pAx`]~!1ݍ[8 !dPdU@3"u'_``o$GD*&V ` rP]f=Q'b`<6d{o%ʙ>@t, CWv4FgXn76n7a !!1z;MPay $e8/x7OS/V^|E&n@PNf8toHڢR]Wj@A7lZ=ƠXd9yb)VG3Ҏ⬃f|sID)ŦdK2[/6RQ_l-PMRIwwj5|k_һS lJ-@|nb|-a}}~ZԼ\~Љקb@)hQbH$%:܊ żE:"CKz|$q;d:_ppbGҗv$AK!p_dLB'vj91?<6<:wwtlƖ!5IA '?rzg{~~ٷz"ؒRauVPzbSd+tb}ғ̢|,dy;yvɓ':y c %ڡ̤|.Zr`y3naCze@u`ыm*JUYмk`6_ϮVMnem sx<;+pVMY;Vg[kZQm=ӯ#y>^u{)0]IoR1}'_vTt^lAHOͰ/}}F ==Ò 1SE%Enl+IR\h [+C¬ an9F@iNNv-,yBY/1mƃhXx>L&__|/-H9"-"vwdK-B('6'ڿ9nu{XKZ<=6phj ճ$^AtL6mVNz&P'>gC.7& endstream endobj 1388 0 obj << /Length 1934 /Filter /FlateDecode >> stream xڽko6{~~wmv؀K[tD;Zeɠޯ%Eq+D{dw].x2L)K#-n6 uD8rYMxۛ_,FD W8cѹjmUیNOO_dx =,R G,MeCw q8RpUS޸+ٖMH[ץ-V+00K|r':0$7ޭGh]g" C>()l1#*WyC#=],WQhaV"[Q dd@s\ YƎogXPxߡ$ԞC;;Ѽ/)ja Sj8Q4}Li7#u^7tUւ4h!7XJQ?eIkoV%V+[{QW#:xMWWPxDe OOa dZkvpڶGW( E΍@@1Ҙs}inW5 s_{#k7{p*=O{ j|;]}<֓"?8,'9`qA|G_ey$ZqGWLڼ{6ۚ[Uw{%P{[s1^)@Mq `)̳lz 9%msgkԃ8wLy>`&lzJAX %|L/>p|Q+Rw6)Ox{j~FE8 ȵyظ_Si0.?oH)`NJ"ho fTFf`&c>i1F̍Q) É֜CEծmrnZUT޵3oѶ?Ljd c'8;gn|Rs|w] ס3nuKVp(bYNy4gQ`_SK_IQM(75og=3dR\^_jsi9T=DS_v56rP[ҖP 63.tKً1'd6(3#BM)1FKHz”yID P,= kk=&\&  \Tҩ|.*T2Vn׵ 9 fF endstream endobj 1402 0 obj << /Length 1238 /Filter /FlateDecode >> stream xڽWmo6_!`(`CR@WC;`a芎h[^\ ;)_t؆ <{x824k`0(޲>ڜI:F!&(I2$JeW!RЍCi}H^g/Z=|Q`5G(k>Fr\fQUtWNJPFRs7ForYMg2U%/̗F؈Y3ѶsAp +3 g) H땳ZDй+;w:Lmrkl{m\ڳF kq ~뤾+_JF4D$vhLH Ab81 Q Fo,ONغproh 溯dl8l&Xn]e;PI+i,βKYV =i'U7A3Wj@$+\ʬ,W~=`6reFBaHRxy= {lM@k ar}r8$^)`ONA8:2bZ§kFn"H<cB&FKtn`1Aa`y=rQTd#-KQló7VBA> stream xڭXo6BP@jFG_6 Zc-Q7YH~G)[6n $<ʱn,ze S+%iEֶ\!~Yqmn}diU|Ԟt侕*pNn(P'7^@4d Ѳ:6E 8"iY#iq$V+ BXŲ!CۯY\H}Ws b8= !iꞀ4MSx}kw$07` [Ꮸa =׊uˎ칞6,Kq͌8: miPm% (ZMZ(Sxg-Uy1SCg[s/A\Nt%u쭑&4v3,*Beӊ=gݳ@Q%\:Z叞N}؄cCX B)2p@TaJC0pd485}wU $wkϱ䖏N @S|jS\\254o&ZQ J!kx-َEZ36BӲYm+۝;O< /EޝuRe}Pg6P#!dNVg ;P!(رJ{h%G_z4=d*P4}S=Đ*z =P)&Z0V+pYEu đ1$c$0JD*P0:mi.$5aR}Kq Y&⫷bіNR%cqZXIer1K,T#Ӡ̆VLCEW"A+.+w%hc Kj-bC\i|L|IY@e2 _lp4C)EjH' zz15 )'ehpgnNd0 ?ph5Yr}ՑӔvx٩1jҍmZY` PԯF0:3ڪ-mL2 sG;_sP6jJ-;[,n54$|x*U}ˆU٭>h`[dUoDž]}yT9g|q6\-KV/h4Uˠu쇪M,q0v{{qki|TD{yO]GN#(2{S*|8{?2HPe `fo׶;~A'+*ĺS[VF'oe_yݫ>SLbp$"N *&y%Y><#uY}YZj00ui:D4|q|JviceS9IH<~ͨ/CʯfSalNwh??_$ߠe)(pig nUԏ.xk endstream endobj 1425 0 obj << /Length 1792 /Filter /FlateDecode >> stream xڽX[o6~ϯ0ڇ@JK/)6lz{i([L;jqSi}!f.^x~Ƌ%!|.p.">^G0NwlW$\E1lErap畝‘3K醍V> ܡC?ErE\A&+ο+hQ^Z6 e ߨUjcIS8D K\gIY+=u0<Hq'^.鲥Gϒ񺨸Rf3AU&]"Yo*a>%0+| 灳Q:nj;<ӯlI5B>x=0q"m3x{^;rjs I;*.LҢ$ 0"1Dn8"t~TL@V~ ~}B p95A1l} *?/tv,êf%K)[H (`3@ʄz]kҮ~mQPfSQurdeY}+4V8s{*T] 3E8CaD4 GKc4;C{Z` cg pLVZHoГW=N!=` RC1P%a#d<rޙm떊@^Y]hX9} >ө(>a!ښ c6݊0H}~ Ybi䑆%U*-uBwl@ȃ6CV; \tF#цq&tHqaAݶHf܊[5ڞZaSjPA-2fKQ3Ǵ1l>"a8f 2O!/NZV 5FIMdUt͐h *z_{?愎-5♕W،MYJU6nؖ*HU#8}3ڎ2 \;eʪ(Ue՞xXhւx5#&s]&䲬ǜ0{ =ج4W}iP]#3N$FK* '8'j @9[n͙bRt5IZ0LtW-| GKmv;9IKqBgh>(àHOU>Zㅫ>WB4^fjӔTEGpz*%ƾ홤u{3>_ gw ZojMԷtoD{C6.tk?j,!m9oYeU}j0JƾGOCɜl:r(q64mF!HSS vvfT/]0Qtl`7cv[6т>p8vcl&Ґq r6凯ENikɨ}ˁKnzg|1n 9R&_ND劉Q8yd":׿vmY_? endstream endobj 1436 0 obj << /Length 1704 /Filter /FlateDecode >> stream xڵXKs6WpC @=$$|J2.MB'Y.%1>];v^N^#ߣ\9c2Fܥ2r>O%}]'{OG9ilFtWh _eؑpc>3B'Ul.._0q9xZfvr;+ /* D{9Np v*כEuSEg;;Xw*2բ/Kˆ[}VfTyefV0VgBrk lTX+HwHz?XClozUcL s⻈R6ɋH6bvϞw/#>RvGmkՇ71:cnH nI,.X 5kvN

_ .ykKw5_ Y mD OS C-f6)-% eC{!;LC $t,Pۆ ZkMUw*Ԏk9{$ڔҗ5 Nq{05e~M:׵j4 %⠩+dr|P"#y'r6g9~Prx'*.^!}}IԇD25K7J*qX+Zӽ5r.h8*`g_)WA5VM~ƀv1Rc 7CjwkU=`2SAmi `Gzؽ/@qI> Aair_d.Q( ?%o΀QC?+"hAܪI@riu3ܕ ^xHWEpFo롾oN(M|]ּCËza9sTgy 4YgOAdm}uq}ۏ{&|$'mj!k|Jχ~:tb9_ endstream endobj 1448 0 obj << /Length 1376 /Filter /FlateDecode >> stream xXKo6W9@ĐGm==5BH"CHXp8iA?^}Z]އ"!Ij`@(D|{M{?V?d /(#Q ʬ`̌uoAa?=_nzH# Q~nW~vw_R{~aV2UiU6R1]ܺ.Q597nlïg/-2ݨV1x.xtg^0P9?P:4N~WlҲ㲫:+2ouTq:e(=6iڵx͋%ӡ1]HVW&.OUۺ٘s{[̵g&cAZv 0LpBywN5W|a蝴\J dԟKBCqDLHw>.Hy{Rq4mtZe#$|O}29BTڕAL* p#=p^"@Ge<\jsFR v|8 $] 5>jkw*+GZHӷ)(rhڈY ݺKMiZk7$:ӗ%֣ږŦܴ{ f! erUA4MQ4̦]ɒ 9yڢ6<0wP fF9Jσ i(_Re?lP)Na`ڦeHu 9䲼У>`cBIbz4Ώ`9݇2O "9?Iit_њyR}(!:\L ЁX][?c\ H&C7eˆO|z-̘gUlq-rVNfI2AGQvq-Ğc1{vl ~]Iݹw8?%tQ_ G/c@=P?uj>2p}yu7& endstream endobj 1453 0 obj << /Length 2136 /Filter /FlateDecode >> stream xYo6E\))6EME!KtV =6C8EޗhD<3Ww+/o_*&dru[Q'/ np+X=H $^2//PF@X"UHH/Y+cBy #6@Ask^Y/=TJkv%1tUB6Wv|kܳ14 L8UHTGޱjUIQ<xTexUOQIWym*4qU2R-4\mm$8Z˱HR}b9"76<ޡiMO-M֬kywTj/UEV2?x?ikv&ok v*h/|E`4J{0xlp SO'"4+.hNCt-|LtƕF;.TUI"ͫVmWL}ț|[<|kz˷\瑛߽]'x{ڞ.턄FSU_UjVZ5rY1GjbXOIy1׼ #DW6ݫt\$o]7tG4nBPD sb")byXD&d8A(^$f7i^"5&<|͗n8'!r >.dIaP3t:^2&7yH3dkكfʾEr I,,6[[ұP3}Ϙ8 RuU\$j)R/S;TKf?Y@Y2ߙ'2R).*90~$ f> BL )tfˬߕU|bϜ@dtRr(۽6 |KPumxj3 /lu|†w6lb06x dۏܫԉ~jZ1a!iFPܐޙ> ,ƥvDeZՙGB 9"i"P6*ǠMc$'X _CjarΏ(n]4W]{Z~Y=8\5I\=#bX%^*KlC{Ha~4>;|%%po?s& b5Vvv ⓔr|fO]5h bEtyc`* Ź4х˓JZ% K>Mm¤OB9l&鶍rn˭g7]l'q_ih F.14Ze4K({Lب^3fsq]WD@>ݩRy`>8s}TRAl.D&V:* 9Y6F.OPgS=] E4·JM˪ў޹Yffuy%O=cm؉hhƓf@e~v4I^:'^zN[,/t5lEdMPsa6s+G' /9I:wecp‚ϫ-6SPGݬ88c9y-0P\E^مP&16B{WNA w)nkT-0?5Qgb,]gihaN~3YC>W/,^/Q6'#}&vX endstream endobj 1468 0 obj << /Length 1785 /Filter /FlateDecode >> stream xڽn6_!d)n@llѶPYr%*;nI:/"c[k˶~|+$!gܺ^YԶr6f]iϘ7R>]v6%~#`xεrΝ0Q}ܳ雃cT^UcœJ2MJ@;)NM?H6` SBB>r#3h9aMǐ؃`)7"3dI=.#C(6s t7hdl`-Dr) )bfYrkNmte4I,}U(T"dr+҃W-3uWP )qrBX晌,֍锵,"ٜYRҒ̡[O74_h\AQ6@#.s.R/C]:O(D!}U?{Cv;۞ :I^wMɤ!w xQieɨb ܌zqE__^ c_!启YIRZ<5FIH}q/<v5sxq-/KY=Y|$dy;LG١ڡ˅H KeWY[ ) *[X/A'x1oUGbmB/&5Fz@ҳUmE#92 >^o9f#jUzW PCzvFXʼcIJzj2 F`$pЂ߯ПƻPR,J*Awj^!uoz`(0m5U[Ii ?Xڪ6)&5vGXJOD봄\)'Xo+& CSt$T8qy_aī : wECq["Ӄޫ2M/8N@G_ڜ ˉ@8 &DM j?(vm}Ro[xRCՑT _nF  0J! '1A@lkPsf2ZINQPb2b')z=OH琵X-4îuvv{|EG9n#~5*<;-$>K@Ah lbk=,anxx y [㝘닳g#gROæ<F0Wzl79&B^$^%_*QMbJDU֌{n%5ۛZ:$ph{r&GDho8 e> stream xZo[9q"EIb. f7ӃۼvIqe?i9~ai>QP%LN>UƧeNP\u5&$LaJ%G*6Z0m Xd8H  UǑmHl +)V45Sfm&`D*!D "s5 '@ .F&"dv NkB^ d aB!>Dd+T'ӯ)M?:,fC!8Q]1d;$1@MPW]"sppK1fex<9Q8CmfiOaeFa6Gq(QBǔeHp+s 46 kyUiWMfHda$*U DU t#P7З }0e1l,-qD|Z߇n9]v^J|$TG-(^{qlp(Ήջ!WK ptquJc.%3#.9G/e)ViN k6ر'0aږ4bӎ2kFGݓψ.$de(.W ɴ!3Ǒ;=k MA;6I=&Tw#_vm=ݨ! }Y"\#D!!#MIqiKv3 )D,PmtOxN_F<ǃ"^b> stream xڽXmo6_!t(*5KQ[ hv0 l"e*Oxw|EEN݇Dgx{;ػ뫋oPػ*cKbxW[⦅Շ_Ӊf!Q:]"nz;)pt\D.2j։ 6Y7ZCZևRpe=fP;-BF~!Mb;HB: Vk`׶7;k-Q%:&"( U-Y˾:7];V9J;T8A(vҵ#jM"oVC?Hٴfa8Bi2Fn~HZaE(NcA81:onė5٠ɐL?L$cBWA߱mÝ=QfG! LQb6+,ޔɍEkVln̂$L%6Itt ^D ^':.0;&#P׬$`y);X]2VgC&iZu {/ѠвD.Q0orhM/9LW AN΋ ELc;hl: L^?*-TDh+0NNnٵa1f0"SWR;>oYT*4 Vӡb_n67NWNRVkwLb|R-9jm`('&cV۪٭O * nۡY,q&JHPio)E8wl~UvaQu]υn.qZ&GHFDU wWi`kgf**R²BRpoO(ȑjC;S3 Z `t#oI-a G74e!c sx?D27ZNRk\޸GF|gNoKY~t;LNl1+>xŜ:1-]+넬VuVq(++.`YÎm Yv^.Mc]6e=Գ\뇜S.$./*f_ËaLDzT!\m^> saMgC| <){뗘1*uKiJ2ۭB=Ɂ+m,ˈAaЇ0Ikmf|L4jkp?q.UTH1ULJv$Zv!){HZ}7zEHm ح, o,GܽHd N nBA D bR an s{i^7uwmن$RH'ڄ.c-LT ^$bڸи/Y:OXCd!9QXPm5+#-5XZY@Mob-'TA"^^_-| 0KR @ ]v^~ s/qL0BA8lr.Ox ll`5FTv'Ԭ/̧ip>^v/ endstream endobj 1488 0 obj << /Length 1662 /Filter /FlateDecode >> stream xڭX[o6~ϯ0:)Qbba:dM;BeI&;!eQr"!Eܾsn~9{:{&g IBV<, =}6[mfrhW^N%Q W1W[<9,%YuUMV}~Әbdܛi9+v8ٶZ])qjǏ<'O=J,(ol4߂z^>`/탓ƹ:-6Ʉ|7> 8蒐ȋPO x=cz=# IGvQHt8Ǖ8V ~MqhlA_K:onR>gsY:mPGh$;GŸR$lrnY6 hR$ ܔ$pOO+ <",1y<[p>7`_Zch/E#EAS_ B. b_dղy2!e 48m„%$b ϊK+yG.Tad X}B6^5m{Dw`&4|J ~cQ:xR1Զq4'ANk0 (}}#5e?E.owpi}STڕբrPm5[OzyyuPfoΎ#ǂ}s6vtz !/kC{W+3BwW4%tg_]wR endstream endobj 1523 0 obj << /Length 2364 /Filter /FlateDecode >> stream xZm۸BHq%*@ \pšEE.6m+K>Qfw^h^fb?>3Cml07Wo>4HI8Y4 C"8 nV癌͇Xvr0I٣^pU訿_86oYT{0};;[oV.IlTV-$I¸ݰϗwȐ9y\ Kq0!IfjmO?ɦݎu^4综A8oB;;Җ`crETpB'Yʕ(1ͺE_]5m3Hy|٥3|$#F8uxH!11}̬;|-(z+bTG|H>$%RP&xjp#EB, 㠭>v^=TҴxO@[=/I%}g{?{#F$Isʡ@Fr=У AA\U hسݤ4'%Bړ_a%0Kn{1xQP6o)ڮj[ {EcTPݼگ^7t>޶trtw3xWCX`MƗQfg]l5+b'p\3kǰ xҝgCߙ;J ?zd@֪9ԥPuq{^KMZfܝur>\fdACQmVqLBBR%~dng4=zy})~tOEq팡QΝW;? 19=:<pjrT 8uhG\Жhy(ܴvU6˄$)lS#U!\ջ:f1FHijNYJ F") O7Eϫ:^coYy|> stream xZ[~_ᾴ`o[fLv,ӶHN ͚{2IXXHw!'X<ܼƋ$!  E€Hŧe ۟LE@IVvZ)7 YbåXMZB̓-lJxem[f:J+]+M"W?UE{(|jkQJ/n\J`KrQ||erܮ(%u.PI1SSb#b팀鏍OOG b0ks)4>ؼϡcF@д-5̀Tw]Rj= wX=j+El7炈kz {'5ZO]hvJ,,’qaKqEl 7M>ePU[:r<4fh\ 'LIHsrnL֦}PsjK^j^S٦;$Ad:s! ~}Zj HxԳ"ǐgtq~6CYejghzp}lg9N\p켎;1pɾf]~nX 2w4 2`BQL`BҐe9|Γ: P/cB %LL`$/T whVYړkjmҠTOLOj-*Hwka,ݱreʱJs{,DSj:?fSg"G:&an.LjW``Y&i[w5˪2mvC^+sԢ1j&K0BȱlaV[v :XVmLCFx|2ZRdGf khDӕ9xm]f% :f(!1S^NӅN)3wa6/&QȦw3]|oAgQ( |Ͱfzސ=:<Ox#B#qdV&qQ#D?FK!3w158>^Qs0Ǖ*խFrw#s b+ߜ\A˱a#iqze/nQZqy|Gy,Nm\36Zu'H}/ ӯ[ z'㿫,}I'BG^kN'yMT SҖ7mΏ=H\8fV൚ˆ5LLzR~5C ۣɜ?p'ͧu{v́\2\W`D j @A|wto>AE^oPS4B0 endstream endobj 1601 0 obj << /Length 1848 /Filter /FlateDecode >> stream xڽXo6&5+t[[`@QsZcT$+WE:{Nf/\z:9ɓ Y $E$ź\|l篓l3rY쑼}W~'W8$I,VADssēGEfyEWF"ʊpӊp]Z~cD8'i/ޟFUk|<nH$A"wj[yoy X[M*vuoj/P+HS"`ؑTO``X]vX8HS kFtddmmmgXUI'Ӈ7ĠMOyEv[> 蓄OO!ŪrpoHd6j!4 2jy2ut"˴Hh59?RK{7]l{<)+ќ)wS[o9̀+o9J_eIQStgnYH"iC<ڶb zI%Z^QJVX 0-@(Zb{Cȉm[t8RVH<Ie{Rט1}'!n`tF2.R(%y*iͬ/DVbڈ,޶U;cҧN7XLjVMiZU%A=^l?(g!fW6sİXOaI3O o)IAǰ ;w Z (km E`'Н!dPW>ɢΛ$8C"|ה,UN XonW;ڽG\ZY Fi~L`QX=cLcG0IBuH6 PBt؛[HȎ}# a$ 2JrQQ"Hg}DCzm!!vSYQڇHB9egj[n"ٿ'<1Cqd9GJ4pic[[9ЊHHe;ZA{(V^-!8t?;{'0IB{TPUy\̕sRo_+޻"vGW]O'*~IEn'ד-~\ KJ)B-00G4ih9p 7N"bښ7J PxkO7ҍۮ &fq&#O C<}x|4%a؃$ {pW\hXi;ga0;qc%'zrڧ߽NS3P Qc |F 'B.kwtrΊcr"V6OZq୪>@kAa {16`I׋:4Mϻ[0/'ݣFĵ n#2H{6ug ʁeJL=+++} {іqpl$tqvߋ\+ 7mg!n nPz. > stream x[Ko9W8sHVA0<vA:9hl"K$ώ~E+Z!cXUd<'gM zM6$xF6BJٙGbRp&`8F4$EiX_JƻPGXᒜ~-+?/ُ<A!(N^-"\+:,x80Lp.d%Qk@!zuA+N q:^_ eI#p;>(?Q@\̮#â3/RT-R(#< fN"J@TU#եTe*lIQgmFCVMPI1 0RS` ٙ:tL&IocJ>r"|jH0 E(Lk\Lf5*grTlNO%̄`rufރRo,cLeSRSJG瀃|Ո1m0TF($KsjNU.U]\EV0TKTמoOG_ƿ3r?U lUG%[C7Ƽ~mo0A7UaE/X. ԟ$ %.=B,_Zn̙3~?cc ~j: @fNb4~;]/oV6{^&?-0gEĻߛDm\,`z +hMly4i7|:DŽ&Kb*;UK@c;3@=߽\tooUBWJ]Jd5>}Ѝ:b <5RaFnԈ56D? 6ɋ|{` ab! Gasό EFHJ(j^„&őd#mAɪG sS:z</ 7ɦǐ3=jq Qx (} ($IPS9 e}s}=Y=B![kԹ6 .Ts\,kM3AHfQKC"Ugc@ JZ_"[wY\7_R4˂t(фL;uG3َl0*>C qM 6{ z:=j)j8(a r&njz3Y!_ԀdT 8s ltAoP0*!]c 6ۭżzt7>|+NXM;UH ^flaoԣd}{@uX؏WmAG{\*3%WS= Xw ]B!㑆tc-t sl~jFK#1u_ԟV_Z^Ŷ zVR UjNjoq{[ɭVsr9qƙgnqYgiqYgiqYoUt7!o+5CD|Uguի6 ^,7UnjҨ6QDŽcw+?u=G#~6@c D̰8}-gWNB3@YI2̣6#.fl1D ӝxj9 ra/^P5/;SA | fٌ{[ӼԅhK:RkZuv O>8VËSub)QiQk *1;_M9PKCf ]@.F{ESu/k?~)Yid.QO< ř{pW>-a=hd1qs94 !sN[=sZ n9b?@D8R|^-/*3ᑤYdq<t` endstream endobj 1618 0 obj << /Length 1175 /Filter /FlateDecode >> stream xڭk6qd[(uqc ($b,SM7BГLGz]r'a2.FR(M(Q8GF?SF̱jвPgp'yp$戰à+9ZnCx4Yqȃڕ? ].g¨J+Y kCƓ=ZFidh<ƵLoo:+'s4 #P wlTgkdk+jkvʮQNݟ$&)OĤQ)i7adS0Gm)4 g`k!o۪!7coϰrB||S>(k[Ϥ4! Tsf<ܣX5C3f=EyBbaH~yb67ZQi8f

FFFDxsNH( Ia=mmߛH]GTF 1/Ī"l^/c0VkeH^PM|89'g*q0XoK(;/eVu͘Pכ#v ^ }҈_ЎXu=gzk fDSҵ`>z UU5oiہ^˝7%OSDÝbHWq ѡL ,S7j$5@CavS W7ko'!e^HrU7XMkx;̳χ-uA}cZ6~H>\[H$J++/<~jnG˯R}>pL8c zl,|w|0eQ|^#ݻ endstream endobj 1623 0 obj << /Length 1152 /Filter /FlateDecode >> stream xڵW[o6~ϯp[tE$ -Kh!m",K$ ?^ml9[_CH}y?y|vqKA b`<  ـ3(lg/nY$Q VZ :!lGbvo#D! C?N#=Y큻1͸7G/xtpmn, >B=$?"&390Ği:LH𤁦>Ry)t6RQA)_o|m~0b?F*@WH^?= v&sܘ+ߴ1֘(5+aI>W1qu9NCNsQVxs;usoz}_gAn. endstream endobj 1634 0 obj << /Length 1367 /Filter /FlateDecode >> stream xXmo6_hg5WQ, kh>_l!\0gɡ'=$F1)ƇisފIo$v&T@R"ϓEE_@U'.A0es}+zFa ^xUtl%=uҹ|6 8L؇Mɽ]2ZMt6NI66JH6()-Ԡ=$/9UDZ%⒖aB|Jɳ$W: $v2tO|=xɩ`ޝo` LIH 4"/ v~Β4G9WDH kmy8/Ot^PF]~R O4 8|6 +\8 Xp }h݆e HܷajuTp>&fuUluل@-F_GI8b3{K1xBtq ɍw6S/Ƃf!xTA]Q~{,16TPLIL=29ųN4>sSqv5P!apiteAitJMnnTo ݵ41I$+ ]nUj0C!g+qNhqϢ*su,7=pۇs)nw*k-ɊR榋rpk<@1XD$;…;}ܧ1QQ.o_^C=t,jz^=BnXJ`_|̏7Q<xc`,==|#> j~:N 7!8?]I7n6 6Tuaأ}~oΤ`a>)ąja%X!L8(,[q8{7=:[p3LzP3^ܛoz!Bp.ڷ6.ah"@>CPVBŽUUEaIU_Ӣ8I+bMҥN 8tk~M8*hLYgku%$P Uq`ސf3`d[3(՛tvoyhf,0  endstream endobj 1647 0 obj << /Length 912 /Filter /FlateDecode >> stream xڵVK6ϯ@cj%]i'RQf`Ql6>eaY&=a>29:9ΜdI8a0J]BIξt>Y} 0MHHD/4' ?i!.#0[>>Gti @Y@u y1}y1~ ! fd Ʈ03oflNQYW.#7o:.Ni>DǨ{[)5ZM@ڇ .!YL|cF+*sU2W9!o$}Z׵^[+uڛ3NP),D&Z 7v콰酔p%UyAV|ԣH %۔^^{L_ǷYF(MmO\Jl8`EbT$4)|Q=*v6ä%K150%[?Kr_Yt2qDv0wl2ޡjDxԅ\3^ANRuܶw-F l3 PbxbtÀ%f.[J7>ZN NޛIfi~%<2?mpM8n_NbL"'pV1 gF #|/;Pk)ڭsSnמM.}~0U'~d(m5`§iy?Bq[Sd\YQ%Fcl_'_&4>5n}S>6Ҳ"঍g긾HkAkey7LǯfP|?  endstream endobj 1657 0 obj << /Length 1183 /Filter /FlateDecode >> stream xWKo6W(ÇHII@7)YE*%Ǜ%RF=g833C`ٯK d ` ,H`OaWk1t}G3"JʈAv1{ tRc,XW/0SXYhH)aQt4wQB'^%x ,E.NSyLٯ}W[K;QHKMWESwƆw,=60F.0n-Vm1@i$a )T?/Q 8dJ+$H^IXԹ#~cՎ,-muol4fL٬ẠcoyYt-D)<1En׹ZD ꌺӑ[Wـ;c BOZ^"zaz_;Ǝ;QB:qJ̝+g};2 dia`[1=r bvLJNB%0&lEn6R3Xvnv)#P"g}g\:^ Hp6, *]톺˞$e;Z/i*}GVZwsu($]WT%xjx8g9[|U `sQ$Ǎg)"y=[ AF'9ʴqr g'#;2ߌ!@hF)lPVN@UQtA1NPn~vw[p 1@Y/F(&v8#81m;ƥd:VU\Xh`1#RMn o[|otIsT((!`t;x?Z):!_HvSُ0B-nle: eY2>]ή[m8r \2gZ= o<ʶ ]o^ކ&?~'x?r _~[@ >XhRnToTn_.VW;Kjj#y$YK /δ5BO]h1nI_Vkݝ?^cMe;BjW9f佞ӆ5Az@!]wWP :_ɜax:Z.t ^Af endstream endobj 1671 0 obj << /Length 2678 /Filter /FlateDecode >> stream xY[۸~ϯ𣼈QuEM)HnE [,_e'@@4E37 a~zg/^*aI腫.~B[=dN}ۋa<)؃yw=sۈ(n;sXQȒak8"&( \4z2Y]ow?9+P*3?YڦS|-]EQh~yS:HEZ{H?>c;-:]Zm1rhM8M/gT# j6y> 3bFZNHY?{_$LU5[owcQ, R`9uMJ걺a;- zy$îB$lf2OۣN[lW^ғ&uR3U Yٌ8\77'k=lncН@f&o6>"hWV' H4baƂو=) _^Ki낢vi<`!7ͳ󦑅ܵ7gQ⏝Om^`u*M]ڈ*SPZ!z*Ww1&$tV,<0Ug~8d2,(@Tt"nT+'Hq ziOxۑ"}!#9Ms N5)˼\ >!~4g24qN%O(谞X"j&WDA"/6dn8@ s`\fQO r] $ :C[[I4di`(rrU, zSOJ˃:>"QbILVUo +̪ŽEFMMD/4ѵMoB~;^a qܝcXcQ<)]ꔏ2?;,{N/j&X|O%vEΨTt:c:G^]_&(I"5tABvдz,]7 RӌEk,EXMijRMTތ,"iNt'=M7,Z}N "T,, A8F^e*,b畕8C~M9$ <7W*?t_b,80.fRN+jMjA" S'ڣ)n.9u cw}Y:m*@|gM+L7߁&X]l}  㛱+`|IQգ+5~T\AoN# @.>p"/?Z,yUehBD[1N<*|94D'Og@}yL&l] P<w/˾* ʞ5`Ж [ ]XHHQat \yBrԸej2@$> OX$钖Ӏ}i$k7fYD0rdZ>?.ei XEZyquLBHE+d endstream endobj 1684 0 obj << /Length 1315 /Filter /FlateDecode >> stream xڽX[k8~,4/]v`e<,EXmeemYcin>X;WŞ'ߋac֭\ML-22W*%mdʌPA%$S PG؏)Ҳ(z/ ĴV[Z0LM\8P;NdH2dM8L \Fܶ*0}V24D7=yQ(/Qx10Fk @^ɒV0(J1z c`X0CwfOD|UOE.8x%Uj-[ĖPHn?nI5 ;FkŜjMnp}OW_wޭh!z j'j9Yq\:O1&IŹj/ E )Ki}aǺx}%Ɨu b]GPǺ1mF ^SOs`"\Nrӑ(݌"K%pK>A-u#Cq[ tkH)wIKi`X8l8&_ժ"kM5IS=Utp..cp.R^ lUYo'FgZ hzgMnT?㣒Lf]dviM9r G\4D3ʋCKi>qڞz9BPf#x#'TI"h7t˱Ji3k%dOpQ Mz piAW]L ]t[ӱV]=HO_oMjS\b[ip̌(U &SE/ ˥wBKj&oe^Qtɺe(PLw=C֘vJ6N_;4UYoo7G\9;FI'B׹vro 5'H_F="ۚ&M awֻqQkmI&MDg3§Ly0^Сz5Ϝ5نp+Y7_F^ endstream endobj 1692 0 obj << /Length 1413 /Filter /FlateDecode >> stream xڵWY6~6*kHE[ =h&b %V+KZZg;dvMacf8Ǐx6ν IA($@qHedvmIi(;ZFHs %= 7QeDJme e||eZ -z |8B]ѫkzFla;bnB7RFH|?>o2*- F93cE],I C*@GoUY?́_U~HRxϜ|nmWrL-t+n2:qcw[FK%ՌAº2 :VTC\to˪{1NfubofQ5m\qTD)gpߔ47'y?8+LZW#߾L~U:F)FoZ,z 'MS9;w Gt(te_9J`2|UD'4#6P kڹ0FY2i/G8˝mJC+$ QNi:0v5n5s4>ڦE~[ۓZڕBrڈحWKdZ4hc `p"%E<7FUEѡ_]("'an #lu#+a#K  Ժ;t;uyTJfԵTRA܎g[&jr@rb[ݰ;/ʮbuBa\~)>p4yrFK2[i *SBfGu+*[U +]] x dmET04~ U2^翻&Ǩ|*Z}WaK#( GGQ-6UYfoV_vg~6 )Q+֠VEW(arp_! %JىonuOCś)c8GI'"ƹ\ ג]c8>,F!C p_,˲Rr/Ȇ~Ba\ 3)@gisWVLSZ(Lp vD; endstream endobj 1703 0 obj << /Length 1600 /Filter /FlateDecode >> stream xڵX[o6~ϯQwCP (hHGqwxl)KLR|sm<{wqzd^$Hþ(Gqx:36|9+1KVy.=kiA;i[#vii`sϞ:ir|'wbBU,H2 0Q'a+.ڃ%on>ͯ熮p@oR(,32 Y{  >NZ5Cn<m R۽46}N&cW? 9fko0H%J)VCsc-G] B hQVHg5w^x-8+c`c֑7-d 1<..)cZ8pYG/6[k|VR8jXKK{z1aITHޒҵ8@aMg{t}EOحA'.sv7[T$u9:Ctb׌L9l, ҂Q ny Dn{/l3;M-dIwY&V*C~F3 "qj[AY$(DQ={``lDR4۱IO^|/;K; +o\=7N+_}<=ސ6^$t7Q{wb:YsTCC;#-`p-@y?;}ZX!.μ%Q~a(D?GRGĔBbyY1#_R e>m?a<ת&PR vB@9of2Y\Qx*KF}`wcYpطiR[jߣ*P AtʳWZ/$mr0t|F+yb}lV-]Y3l%Ӟpm4 [p٩)p9[ qA4sݹA XomS]Dl΅cSlOHGKYpʝ`d5[S3 Mf:@ͺD] 3]Ar\äD/%8> stream xZ]o[} З䅗p8da,-8)6VlamG=Cr$Yu]Kg\qs\0m!mDC&*LַŤ'xbrQLQe)iwY>|R%"+VGT D2&Pk_1E LŒCڥ ]VR].er G0E+Z7E껪2pt|||p85g_oG͏EaO}}8jNSG+lEu9GoK{gM4}ysuh/o~yu dqNV;AoB~lOgv:I.{ѓӹOM^YlFa\'vfMrn7i pZUt毝$uF4^X^?-5ƣ}X4ޟS9{jQuYW{NFypԾ/GJls,;4ccWM0=`YG4KlK(bε+`Hɴ7N*ӫz؝^­QaĎO,ĎK~1qyv!D~^kN1~] hVg imy\]2>xP: YDx^/8~Q: `up=폻AF0fM{AL8+{7@hC|dGAѭ8tGg zp%lZ``"+e*&x0 D{ ‹ iHTv逨, υaiu(gXai/ ?l-#\v_{XTL݆bAs",Hr|u| U\b]͚Ү ps\WwØ8ٓomPXȴ@jf$W'>FB%\YS8 ȲwF ũ8/NI{I{I{I[8T8 6S ͽ=S`5Z 2% zDSu S !bk-8/P zoY;BTQ! /m8V<\[h(ouB/R}TII3Iî.b[RU\>8g:68_rHG خz7zMjћwh|lqs5N4Ǿ=4o!J{eYVV*ʮH4FkGi Rvt`-42 NccR\-eG/e endstream endobj 1716 0 obj << /Length 1492 /Filter /FlateDecode >> stream xڽWێ6}W)HL+En"HѬ ZmEr(YwmS&3:[:?N~XNA$$ YnF)ЉBJuk4g?/x1 p OY#4Zb'3w#MDn;Sl4n@jƜԙ38 Zb(B%Pb :,/ |H|fUg0nHW,AmmJƭƃZt4Y9PVֵx*% |FߧLZw\Y™(FYg+.?oU UFRL+CN:r/ :/+sU뒷J (.Pi6o<\ +1cS-d&4+Qڸ3k->FjCI)ʭS9lu5_UZG#2?XKƳ>\*ur)tIZ)Rw^.<&BO|t[% 7bț!Ȅ2!K?/]RP^LU|:$*3O"K V§C%„ )U7mv튤UPQF,u~U^4Ӆ)0 Vg>i0|@Eɛ}ϫO!tkBwm  fȜ|33#fWqo#^ ^q,kv= t$jgdЙ*m&kHhJ"pSoǿ囷W:{4O#JA\4b]hm4Apϭ^PSr"ܴ WseWiWrdt%:u8 ~2kUq!'V1?q}@ Pv/y.3x+z_w1&Wf[$dMɻY!`/a-uH;7_}5q]z/#wt%I̺vp1MTwV;)rikܮmi\{ȸnG77MnU=-yqK]3K.D${aS) u+q` ^qGiHHn=hBV)T+K{ YtcfP!;$A^e%7F o%DZة'9U|Զ@lvFz7) K 0B@w`PjVjU*)vvy|@-h>1RA) FHa88~G `C%UpashamaXب7@)| К11}x5WqZ\=XWϨ8 g|5QPB9xkjc~B|??36|+xX #tx̿~Uwd9;s׃rwagC ry endstream endobj 1749 0 obj << /Length 2315 /Filter /FlateDecode >> stream xڭYY۸~PI xM9YϖrړrA"$qM*ܱק iF_/~znd4n8g~-∳lqQֹV_~~uXnffs䵈W_ۣ*=Z= PTKX>,>4X',2]-q8ZykJW%u}!WlV눧K7Bgwj|ɞpvy, CzWWZͩ8Ԯ7k|Pe-aje3tk - 9żn`/J oΏqW|ӷ,.o/υ&k G' -tb,ZAbhK"=:M@羮uL):~muC]imj2x,J]0-x[4.a z<' p5ABu>oγ [$XޢF M3%\G}Cf՗Y]>R7/_;H8%!$N.Ε 2pf%@=h{²8SKDħ6j4p!gĮ$MA]>U|F3zA !~ hzlrcN5)] MN{?(R_dxaOgxbܟ)~Mހo4;h>S,iAn,Kd ܩK$ 0쪡>ms2~FҪyFvp-FvpG_=% so;;9Q r%x40Ʀr窵R 5<'!c'[ mݳ-1`K˰0U.YW5c$KqFG- 7I vp'#^!isMNWp9{qqM 19J'U*:qf8}Za=j"()]5~!x {t鱜%Y)jUÓ 6 PhCL 7y)m8s.6ny D=}O$ME5 v噺ƆgAD{x:q@|!!4uE6g귤dAtK2ǻC 9 z~ոc~4)ZPHOD?U?cӤbf d)IۘM@<_F hĒCLtqUL^Jp[juz$anޡpmUw/]8!zk<84>E>1\d[][S˚ ;)To(0BwҁQr^(> stream xڽXo6PLbVOJ @;݆2#юV= J~GIǵӢG{'ˉ=ysj~uObsO拉c$6 uݡ0K!HZetKNOuo ҹ"k>4.fggOp}ZɤgdMc “ aO0wV=OpQFhd dPGiN8$LP3[u~aFRO,JRTfuTG~`"*Kh(Gmh% :kyS!UB%+pXJcM@b>H*dP;1D]9 4BCXqn.a|[J$cAZ%m/IYfi+r"8Ё(Tc 5u"kiV@Z^<R2u'i_s/Mho.6zmݥic*tH(J} @Y0=*p䥮O?ı\kőՠL>f%=v)w.}ְh 7spIpnTiG5RجOD}Q|dH~kC7 C8 q],ЅzozàR̸0$'^M%~Ϋ'b3]zD_j-jW=6ǁMt0'^m -, EElʆm،t'~ď&xe4Ho߻pDA. Gp[} Fp3AI qkQt9e3C *Cѓ2 on͓6ϱkhз>j-ЪLoճ\%mײ!ڇ-{,&E2QBpm :k8\קyNB/hCzu˫ˋ1t7zoC&N}hl!y: :c }ABTy[dx7=M4'2Z9Z~_m[Wܳ~M̜YxAp߼@ \.7Bag}Iu#B|!˽.3v\_$*FW.G> r[)e}FUы zv FgU~O(8ag}R!ׯ4M$\)>:ub]L- endstream endobj 1788 0 obj << /Length 1338 /Filter /FlateDecode >> stream xWYo6~X*hlC6i_V[,َmEs|3g.KPлƈqFܻrqwOF, Q$L ;Wb-(6> 鋅17VLF;(E {ZE8i%>bG( kK =lmx+TʾuSuR lQD F w)TW_9<,NF09rCmKɞ=T>n,ڸd#;F蟛f9Quj_\P7Q_4f?fl3$b^VnﰗX{OFBP0ס*@ : Ǔ1&`T(I_9"m§om,̽\|T,.-@SgnMP [g/, y/;׵(Z莥NJ g|^ fw%WdʒD0@ Rdi})ZtM]n-W4k6꣆KBX їؓ+vbʩI3Z> ́%}h8f?yj@ɩ⢓ .e_RBԹrzq.hH;3 7 l-7?3,B;d9` y -|Ԣ8d* CU-wYS>A=k& IYWg׋6-J"osBxE Zn!:L?ֳ@t ;W %* 3Xa]E"[gXLE']VbKFӀ=scaHzRa'ڶR? @vD1K >(]nDVhr R53쬭Vt0q,fm3('v7* E֩L(E- c?bo^E>qH5U2K?'& ^_JM?N endstream endobj 1798 0 obj << /Length 255 /Filter /FlateDecode >> stream xڝPN0sݡGM\ڛP*-{˖͢1ͤo޼[r-$$8 K)i*v{^liɈ+gt"ea5:o%Y|y')2 o/;qi=|\3F/ N 2\ax]d*kkFI'jV5S9)"=shGm;I^|NVƷGG=yPWʃɬ m瑩l> stream x[o6_!fFɇ LM In~%˦"SbDAQ~ͻ% 0t~~wu8}` "WZ~ۏ8 _ C@!uR'=rN "@( _e,e~B'TƢ#ZV?rq=q4-P|LZȈސfw9f&C.>Ş n7]"51;RP_'V &8Fun0@6wcaot5WeQU !jhCOc>jY.x101ڤI-ݱ@ueU__GVQcVe⬑i&""QndoKOr%: ¾oY4~\Ƹ ChΎ}i—mSg.>Яaaa(:( qS{i$˞~ni2C^M Ceqq;ern=3͠ΡLӄy@Q/0qf<j陣Bښ[}=FgDa@q@!7u|P)u`mZ1M>/sgfxsɥ3鮝tR'?CτDW(TA S:r!]~CtipXh+67 ޭ \_K"&NjS6(|.StXKg3'_xwE*O.nF#BE4ձn#z@1vZ!NvH8B+/<ȰMUٮ*@:W 6SŮYXױ;"PW;纛َX]~q51b8)&$#{Z5N lp{Ԅֽ Ӗa="ziˠؾrWZE Es'%?Y{+ :&!+x ܐA- c"=(ǛW=*! endstream endobj 1712 0 obj << /Type /ObjStm /N 100 /First 982 /Length 2703 /Filter /FlateDecode >> stream xZo~_CF'-.%64H*|PR,T|n`y7w33Wb ITp$:rr3 A#;Ȋr![Fs\:h%̚R&Hu;:rU7J:)Gˬs%r)(TK%Hc1#uduv(#Grr9(햳ˑD7rl'.-\we YH߈+ sPgK/7/ rQjRZU~rg_HXTʡ:*YkrDUx$lҺ\uKjB5wTqѰڕ*UTt(kZnT> k(J0l@KM]1N`mdLN/ _ġ/K!qtI1q %8`aLer6g1F}yJ%Wp`R},PrׁjBX}22n$?6:!7|{ Ls7 #r/n7g|p a+]'8i҅OS]| ipCNNT}cbظg.c/He *&.ob~\-/_N7~^MݸT>~lNԪϟ fu9o^&.uu ~%z7oǭb ׮rc~GK .WWUGo? F ]Uc0W?= RuȽslٳ>r3[.?_l>< f7ry=lfWj//l1\M6?& ru_{MR3}7P칧{]afl>˩1z;'Jc=KHּF'JWӉұC~ϧ{u":8ud l rAA!gCΆ 9r6,ęd{m{S`' HudOUy_ԗn(:wj~'fw_ɔRfpjQL 2f_s=Hi5}\OS Uުۚ.5y\5ktepĞz݂K Ucpjy38zaaHSjBp"v2(;EQcF3?Zq`1=ILz[mQUX1 KzDc\JEfP@H7>ӫ΅ŇI 糙]\\&EnՂҽoh2[l`'U몒z qMS;lbLWn{ˢQ^cj#y-HcBъNZ5<=5Ъג^?G\`Va$ uGI\O2`miW@1E `H9F5K%Mc$&%SO( /XȧJG *V]l&1SOl L E+TO9R3ùd,ލ+^3"=:BlF{@H"Gy.NѪzǣ66cU8)(]z_N΄49/# S#s?i8Q`K~z~/ *{]y aתlQ+bz-*Hp0^},!ϻlS‡[OsGMKRĴ$ʓLKGwMKŻ/Kw?A٠ڠـm;l`ѐ  9r4hѐ!'CNlglglglglglglglglglglglglglglglgƜ nl 7[Ípckq1bŐɐɐɐɐɐɐɐɐɐɐ!WC[ߏc~\ r3f͐!7Cn r3dKLSĔِ-22222e E1A1A1A1A1A1A1A1AX 71"5 =Wv ~>1{PKY)Կqȥ uDHڣ~GEg~AZ oF{5h\J=L"˸"#~%I> stream xڽ[ˊ$Whio`nbl7eFDoZQ EP5Nǹ/4I!i"AAK r(e`=i|% u .\q?l^r|%x}~Z-d{R΀p%bMd 9Ud ]d e@عK(u5ձb3Jo%KJ\W,Dr: ՇJ 5gʹPKzP2Ψ JhyZsr~P pZ?[kc] = ;8:Kr[W/b{XCCR g&Mc%l,ŏT7 -R*Qz`_Rv|WkGRؗR;ުSҩ/? ;R>#|??9<~6|8sS$Bau!-ڐxby/QcewpochC6`}j4F)|Y#za%3`k_7 m/@Р҈!d]+Қ$ r1dfJfA\P ^߹.JYֺPd1S աnA(l ݓnȞ"0%LT|Ya¨@\_uAH'gM Sޤy^u0%cSǥ2!.(kqkW${VfЕLY_NKMӘeJgY i.\XrX#bd)8MaP I' %Eor?;x;#d g(O-a-'M| ¬79!H321zDnoujU~@@y)6$nDG_!Xe$DA,5Gu`LE8v3";fU0K';̲/l/Rfə Z`8iZ4v@lz} t9)A7O=t9Ccy!t&kp\F0d?Q/OcI,:~◀PT` 0- n=!]@ ,6)[e\.Np-:wIT[~("6Y.G20|t  `bb1$dU2xX9 d$$Qy3C1+H@ReڰlvH$ϛ7GbAbuOS} VI&^@6!yXNz2㵕#|<>@l))zA4R\fy}a (TP$،eʶ ʱF>ǰkʐ> %*K]f1*#NȺA [AAQ-͘t>nheN{&&m=ikɑ7K[_n^,?w C]J{UC. / Ev &vХck,^0UjrfJ4 -{pgmM9b\rc[d3!H)=s}fP`0 } .NWm1Ek ;& &Įsxhg ! endstream endobj 2076 0 obj << /Length 1914 /Filter /FlateDecode >> stream x\[o6~ϯc ̌(^:)+0 բlHrQH2EJE$ o;,P.7 ya`h (`e~_zkF^Ϫ֯i$IpΏ֌,_Ǐo>T;{3=_ɗ#[/b{i%pi3ɲZ^^?Ok[3"5{rxJ]_*Ra=G,Zd*IA"Z c4 ?^i"TkV da,CrNUBixi),_>UE/Ów 4S*(ø _X%~.Cd3m,s}6t/6l8RXdgpH|Pʤ#pp<0l2 q 7 "b0NfUO\-U2E#cCr?sp6JKB(?tJreΦ֘dYgX]V׆Ka2'4w 9=kӏku9f<:^陖A]]˕*= f{]U^nkL/޼J㦖CΗ{=풧|la">=Tb< 8D,*V9|2DT>J\9cqr\ڏ4uܶHp '8jM:)v$)S+=Шv>wJj.Y1 &ZuQ#w2<Cl֎pF˼~*ƁR@m4Ø͹[t4l$ ц [/RؗX!#j搬!]!fၑ8o{.}0 xD43; 3r_z;+09=c$?'K冤x pl4>0W`T`1\>d,۲y'|[EbP.>?ȥ?}ڕ[obV=s܂Tp![Em|0Ou\:Q|G'襁I=<2@l7=mx\& tśI=w1 iv#(Jg;bbw5]C$ӺB=24U}-5sW,СtF5А^АI!'\ Y)D?xr`nv svw5D "0}iqU I/vz'GA :fی-[g3ddzTޗ|![uob2YbR3{kZIn7"uݩZ`I16FK`]h)c9OɏbǢ#ѕܕ(gg.YoJleա&w_TZTVbô:m'M8ۊ* LJay*ãcy fQ.MĦ̢ PAli:pU/INa~tΏƉ) s+ CCMjNK endstream endobj 1925 0 obj << /Type /ObjStm /N 100 /First 1020 /Length 2639 /Filter /FlateDecode >> stream xڽ[ˮ߯2 %@H"hCא%k/ҭEOTs}xXSF)2JIE5IR=M:z\KjjjМ \K@zW,7I$-$5IIK2j 'c43U`6'Z5chs%#h'd2'3yu$; /PJ^Ydc=H@Ԓ2渑9:^R-oTi9\S斪[hC-@xRWSm$8` qPN9ޣjjmjl}jvnc{F-u@ש:R@˩_}4I&; Yl>,ZM\6>zSitHr(6uW: 1>{ p*>q!pM{ff5^<!X&&%J J W!b6)Ӷc_xv,}pGx@ " oٴ9cq91:,iD1v]4,,D̆%^xϏO.?Ӈ?]|e~x|V[[Ed \,&CYl(}^H7OMqc;ĐE+ ˶*8XxP".{$j>&X'$HD]GZ%" d\cEr9$a.~GŽ$QIBpH$`{EاPIocog "!@c-O] ?K$AY# PZY? ߡX?9Gveycz{M=\Ƽ?ICa<}]~_Ա_ûhwT:~?k5KUhn_ULP J(8"+Jd#وlD6"Fd#ىDv";Nd'ىDDDDDDDDDDDnDnDnDnDnDnDnDnDnDnDDDDDDDDDDDDDDDDDDDDWYB Qp B)Y,D" Bd!Y,DV"+Jd%YDV"+Fd#وlD6"Fd'ىDv";Nd'y]t%IӼ@F[% GF-ZKhߐ{4L&= @Zl˾hRQ蹚@ T稌фs^U8EcAχf~,-ڔV$ń7(ެQkY~rݵJpog%VUmI- yp[Z? *Z#u'K@$ /8!= VH͈]PIGl.OʭmGAḵGNqʱ"ZU8(Rjؽ <;('Fa9Z`pm 3zK[: VLDRP,,("%*Zelnk)zllR ("?%k)fdHHlā8,n >!mPDl~|¸qX3.ڎR7k%򑅌-h5wc]Ϊ`A ,-:f0Gw=-Υh ,ՏtJ!y4tF4sWd^$Q+u+QHH]g 8 ^cBqj,%`֐dz"c nKȮ\nꕊuK)Xz:Ћ)\vc5k) wJrWqPWriaMڑ{( c$$%*I#.]F2]~]Wq_":س_#tb#Qr>~JJyTXЁld;,u%z.] m1)ӗ-N݁ߛ1/VZmA ~pI $'7cT I4Pv 3=f`m6v9EܓHGYgv}'֖R*H<"'e n[J6عm45.Cvn~ݡ-Mۀw8X. ;½qXC>?~W[g*XbG5R8Eܽ&";#8-n0D5}~˜a%!UI3af!I Pˁ F\&#;Pذg[?/4.uʬ$/.zRq n$r8>kӾ[@ZݬnP]woU$ E)SNj5)«m|+ؾ][1Փ6#ީLvԙyN;uGN=wN!q#5J#륺zA.#c6Y z/v >g; C9H%qs$n%q`UGcy e ;G VcŪyqkPXcanEh> endstream endobj 2272 0 obj << /Length 2158 /Filter /FlateDecode >> stream x\Io6W84q"m!hl%QGIn&ŢBSEz F o{]/~8dq}!L^z/_S{#N<ʹXj Y'lgue/~b =Xcڦ{/b3~s=actq ֒J!AXN$ aB5e,u3.tI[T-[.E| Dby:zГ,̣KM4izE|(7a[srbwz)uL,gd?/aczAp&a)ųiI -Ȯ؝مDM^#JM 0hwĹO"pDiоi;E-:dIQuE bxHb3 mb D0]ʙVc4]˻Y1>i֓yQލuTSYFbOH뢴fu0WrG@%[W1801m7mU82js8Omq eE祪䴲N8-w y 2qםx9AZ *`=b{0*aW𠽸M_ldtz-Xm>+u]9' ̈y 䚦Q~uu&msL(-M"[KbdXw {t0P]Z =l&r>hv~Mp_xj^s2kYsS##^]55RSDrjA&M--%39" #i<.Se-hGo`[ 拨泡Q:vj^$`;kX$IaJC_O7K_ogT~l4 Y.ކ¼漥5S;8;㨃oiCV(CҨZ]ܷ :rz4>|H΄z jHjSi=]ncMw8pr{o>Ȓ,U@#C[0@4n"e˪P;xP? ?g0 M<YY QacȵumqOhbxχ 5Wqk>$\|+FJ{Xjﰔyi9ȲmWRCR̃͑9w/ αHx+r9mղ[:+Lqܟqs׺BF4wB.gJ?<3ȥ׽i\x:I _J:cPJbrӌXFNS%9"N q9Ki7I\Ǹur CMXI28 "> stream xڽ[ˊW͈|!`ia[h!k3xD:u W58@=y*2ND4I)h %h-ި!J^PkmxMaQ xKȰ;C6: e8Z4]@Z^d*|d-/>wsE.,hK?72!g[[=gCklKw ܭc nG1 &OK05jܝ`E>| 1@8Z1Jv/,_ _ %`.ʸ~:BSjvLEU 10u>_P[]PGr3Tg_{h:~q[JuK6 q jVh-a [;/=MCw/l,}{vJK_8H SnK_М;CDp&);FKޅtgI9w#^;`47 Fc"4 -Ge0h[<S&F-71ZkE#<0S0Z..,<8ՕGlK6@ܽ0_3 fzvo]|Ǘ?w~.!J?^t,]~%"j%6x`YKZݾ /^˛ppy~ˇG+|s?5֜cacE5ZtL$1`V7FtG9&aHh!"Ls9N#ZrhTfH\<ɠ4ȁ8$1U HJpK)j.;$t%4aAIdC"tD*dBSL'*+ҠP$yd|"I8|qlVq3I(bc#18jcclTR&<Ugiވ( 2bGE<^*23k'ӴC÷@8A%f}"/1l/I4C$ԑϙ F̍1I6?1>XW$LDc ufE]J!H=-ͬ/ 08z*җЈ|N`z5hqxbpk9)H~* Vxԩ{+/g,PTI__3w <3;I :/MKy9qg-:2CE̲rR-q88j ^xkLC`/2>Jc9f7琧JSq_扽7NDٸi(Qhxm97쩆XU|[UTL!wh~ꧽ7vh6W8p[GM endstream endobj 2274 0 obj << /Type /ObjStm /N 100 /First 1014 /Length 2205 /Filter /FlateDecode >> stream xڽ[˪$߯J4clW ^c,o:][*PYZ6RTD)ã|ʀCFʈVNc{Ne4UBʀr <( 9/ Ԓb=a14zkL 7aZ/k.M"dhtsaͧ{saP|JA`İ!z! -94(~NHgݡo8 iҦ?\H#YQ42LӰ:\_9cyӧ_߾ק/O |O}y{ǿ)?HjQtI3צyߗ\W~_|-AR34jVj>:̽u F 100ģAjzD'`[BFMfPenu!/Rr0 H0x4yBȿp8%k A K }8R'!+1Q)er>YN,( N@J$(AWE8qP$hV fn5,NT+< P%_ EB:@8< bQ0TN^$$끃* zA5O-̰2n D6@NA@j _ĆAň~n%K c}1l Rt*6QjF&-{'L&vSR8 JuA#fq`%Cb b%8g6/*Brs ܦlX2[qҏ$\ \Ifkc'u 5xy)"&^Etl]1:AO-HtPkvOĦou6YLfMhēaw~bև.Ar80lq!և/AK*'B V#qdx`{O=׍M֤İ]F/A_p]Owѿ01;,K"FMKbEbnG z%MR. V+EZ8Uc cge?;XY+0H DPN`ȓg}˕|&)%ce귄lXb'.ư2dgL`م_XyxelkPSK 9A̢Y. ytW\W xY@, ه h.J(BJpW V6h:. "y] x=2(l< xNSQ>;xf~53(Ƥ헁Fa =@5m\m<1(H%7UPuVHeP>J#-!X ;Z5owїvhJUŨzG&G@akwcHVO bf (O!M %;}pLzH3=^Mhd&$nTr]M R8uR ^-{]XY 3AlEry)8퓯/Cxޫ͛l0=d+I%\M)BFTEqE_bxE=~%W;zzظ%\[=~/[=~ %=喽xricO^-y]cb+.1/ [u biG{L<ϫW ЭN3ԡ^]/XwK@n  dL_PU e$3J큓 E+C(o!.g._e^U endstream endobj 2313 0 obj << /Length 824 /Filter /FlateDecode >> stream xXKo@+|Lc$j=T*2I4, R*;7G.G泥 a"2^EcĸH0Mѷ駳 w2r_X?xgxaLUVƫՃj5 5L2ĈڮYdUZ,qdڱDF K6aQ^G\oc* e`B#\ #&*8t`boAh1i1n1+B3.G L*o-Hj֕3%A )yM0X`|";^@dil\ؕ]X#eDWX4|Os(rq)'ܮWꆋ!z @](ZרA4E ԕA j.΋ %u>dqZV%9{ Ǥ/93PɁ1Ӥ+VfhJO7:xDUzg:(Ӧi` !5%`ҼUʛ`4si[I2ETC5TK6$C@uK `.\pG']Yh+Air `?dm) HeӰ7 ý0ꁛ>h6nQv1 7hZk݁+ND;e[cv.Whqcs35p#y&~7,_R064KB,)s̽3K$.=O{ ;5+OR5/w endstream endobj 2275 0 obj << /Type /ObjStm /N 100 /First 1015 /Length 2411 /Filter /FlateDecode >> stream xڽ[M1pX$d&@tH؃"/!ƮZο{ftH6 @XWE*FQ\RAu(%k91R8'V8"$(;W)D <.:Q5'12'M9%ȵ'+#Ԍ#LcISFy L//K҈/p^r]y9T.}d"jW\@̟a\N Jr܇˵-D-p&!y٩UͩϫNyi D8GSPkP{Jt)J*\+ٸN5.QbYӪHI"FƝsA%Zk@Mxk@T8}o-V:\k=YYF؊3Vتx9a y0-fffgN}"3nm ެҒjRa`Q)&gCȳIp ds \&ޕV++ Zl~պeAX v̹0ZAP .T9.!%PR 'o %n*VW^];w?~黇ǟoBC=7?\~nr(^ |V-VyߺW;݃;v7\cETb>3l`QT RA|f)$"zAR+yj=Ծx^I!*:zCb@YS.eٯMpGGfZ+aKvL~B W׊/o+"No>vwzڝnV`:}>pן>?~|ٿORC,/߼5aePǠY/8i  mHnCrmH!نdmH!نdmHE2oA4y t 1hc0$˳9v/A" 0 _}Iׄ yz*|V/L,//Bf/VPI{`s@m bu+4 ]A ^^] @Bo, |FR`\Vf+z85Dx@E',χG+8L*|P.PD3-z+vϤӠ V^}Eۇ"@j9Ltl1-V|;,R= eۀ/P -{ X 73팡F^fDBeTq(뱑42S隹4P5J=Dix} k쁘9VPB f*172YJbgk$q6Z.Agk%9C?_6zF o>> endstream endobj 2317 0 obj << /Length 113 /Filter /FlateDecode >> stream x332V0PP06S02U01SH1*24 (Bes< ͸=\ %E\N \. ц \.  33qzrrJi` endstream endobj 2315 0 obj << /Type /ObjStm /N 100 /First 888 /Length 659 /Filter /FlateDecode >> stream xj[A ) Li(%$.n$^$.> stream x-ɱA($ \vTSHB $:@\#Q_TQUE&MG-nu8M [Yð,ΐV]'v=WN;S3uz3x:cE_ endstream endobj 2336 0 obj << /Length1 3063 /Length2 22363 /Length3 0 /Length 23896 /Filter /FlateDecode >> stream xڜT]% ]=8 ];kp`}{7=\dUvmȉ酌 MlRFvNv@g5Zcf''Wp6L,lq4:,@gP @`f0srppXh 02X́&"vf܀TFԠ@&zP4 @ ho5q0G-Ƅ׃ šΆKWVQy/W$+PGZP8Aɘ؅nF&N&[cG,/ g#hCM1U @el+)3#5hb"v66&NLc #g-2C#k}0%䡣|/&xv{P=4{q")2H63Ǐt3%OHJ%aijU.*U?;jTn0e*)sxuWsE'{"KS<;'7j:Q \IqA bЏO7? X)%;~h!1H½Kh(2R).(&S.ٚ]9]J( 3Dc '(5t qR֣H`:Z'V8n#`0nf ~LH3X-ԁ WVv . ܂qtXmk=)Hu;mtx$YYE|F/#Wup޺JC?,o+CTa 9dG!H QyZ;&KK[D MaLyԂR9tc,>!^WrvtJq hai^}tM\3CB7w?Obތo$\gG ;qЩ)0hQܧXFR>dݽ'6ˤ9[(@aeQ6֊{A15nHP6oϜG/0; 9ejlǸRp ~/܏W6X7L]Lؽ!{< 1/& OWX.[o]⯒:]B-S\s*̽:r%]po,!g?H#a2t7Ul[bӿծ EטKE-"S%3r91rI3ԟfF_ׅRn\Aʙ~aj~iC܇B(Ԭk^Z:p9ѸA!~Ƀ`>n|UN[p$W!w|=lLL>lݵ:Џ5Tڅ&K\g߁#E}b&oJ,~ =!e:;TF85nGȬHEsrI]$%POMHD:,}Kg\T C}:ҩ8F=,ToQ2)uJa2몖H-xadG>EyNiİs'p~#8e6( ȑh05f\Jze0TGZ aNFGFRkVFJE׻-[ۙX"d[D4J?u)Ͻ2~ڝeĦ'cPtwֹ{T#CD޽՞0rIvMq412Bw(0#kS(&<4ȶn:'@Qs',0BgMG#kivr/F?ǓbԱUD`^[TV} AP$ 2?+Σ?n@XTs#xst#sYWi{"=/Y'P7F1Gd''Ӳ?>Gszq0i<!J_q3[(a1:-SX&]?ǽ!҂ޥrJR:Nx؟߇{$G~]T;UyFVЅԩp&ʗPS~x+HZfVVMfb^[=;n T =pIoz͕L̹K3I'&U{P[%XN4G fB](4B0BFXwKJbH!HȤk@޹IXZ=(Za|M N#$Nؾ"ɸ(32Fq|3ToεDBm^a qX :Z>|$3Oj N`8uP@?Klɯ;a@  ti'fz*&]HӞTtNǢj9dE-IWknǹw"ۤє&,qvc-Xtwf%/[ /N7dބy^;\C^,nI=1']b#m,^4yl~~8ЌV"!^a|Bybl܁'xy@kFWg,Cj^p# ummuzo0F) 2ڪjvp謵yH[ra)6e>jֆ[ݮQ/=z mW i%3WеO'z0aGL}m,YE.xYimFb n䯊ó\4bLw$cN@/N8c"bk1#/YztOQO9F<5_>݄!`hpnVN/8<%M] 嵖vU严=; -Abosj'ђTWȩQ;F`4'eoB$oeWԪV^+X&Rrw?>}Pjsu;/*`cC]>bfv]8V'B'7֩*<q˼{Fb2`=JYDo^v.y2{ $"rRNuPwK7!틒Q'huI0jwPknhB/Ey"[cΕar H9l?/pϫCPFgG$秭kl4ϙKOyZ8>T (%/B? qڅz9-d=Q25ɾºFp1buuѭHMT 0'b޿І3\yA-Q`et1]5zƾoS[WpWu9O C҆]z#GSlGH='\<e~p8$Wȭo ~̳Bn&GDCkG<%%pڂsၞ;5anALH@qq+v>B`c %!)%H mIDjwߡI*R1prSvM+;X*gٳH$2$Juo!CZFf<& F]Z0:_N oPS1/IՏSNPe :;? iO5>g2`ۘkCUU)$ٴRby)35!bpiNz]+S25ܫhK*/`[[aS5@r(# )ăYX&bI( ^PJc` ҚW5s`JJn&}Gu="O?YpE>qw?~R; ;|fh؜ױT;D|OrI IӲe F:lcN{Ff-o8QF6iRsy. _F,Mi5hWm*܁O($u%ڃfQpKkI|NE~[,Q>c@.tCUюuSrʶ{A#''huU-}BO^JKLFIBoj/|ƥ}TǗڬO+ js?tWcƨզ½"+FVqܲ0M0ŰsR>KECR8ŶSIe\Rϲ\{iV{GNUah0ځ{-8AAI >=[wρŮۣ|cRO&Y6bȏ%6`ˏeӵGYc,H9`-xQSXvVFI Њ&]G6]F&Յn2*~ME?y&`q(RsDLRۑ+*~$~V(٥1I Rd&.J3T?:"\9n9G| [m߰C(I g7X3dX_t_P,EDṂ&CXkTgqgo=q_nj mWފ~YJg: q\}>F 65 fإ\o'Nꓵ&+@Ӈ.bQoY9 0okc *HBo>鬗_35QkBoN ΁n/SnA6hIjIn)ښ:LٌejI/%:(TL>`fKĺ\Xd5^j)n k`dvR 1'BvV"%t؈ϊ}Drvu^z[?4q}+pۺMT*Dt\sZ$[:t{$o̵IovXi>)[S˱[f#zoK%ˏ˛~>oqY{Gf4f1J>•aK)wbfIDp_aT1R"?ut<砏7Ӹ@V7uQPdqb@=cSmvq$W+",z{,^ܳ1Ysĝa6Zx@ j}j|Z,LҰ~@+x,|$/y\wKmP1s)7]IT^4SXyB86Y2y!4L]4Gx_ 3{ԅ\Vn &&ӏ$5?G̳;JB|0*-j^2o$lNy\l9h`|Lit36aJ[ײhi}vm#;Ǚq'|,mׂ 8݀%e. #%lkik `ߔhνfއk:ea#^~E;V!gxra>4Ƅ{1^~8Ő-. bHvW7 iDaj;5V^DXQS iBLs)H0n&@{f% *?w"D_%S|oCܼث}p.(Cԭ.|2xB֮U3w<<#ٔ10[~8`^GpU}=^CHHEeV5js?/ф(n9͖Dfen&'kX_2v.l|y @ Nz" ܇`C?]^(׻&4^|~rsN$^) wpTp69g>4ԥ`EJ4!鐈T>uwvQu<6OvwY뾄LjK_N!:$4e%' Lp C֕Dx{FzvQ[Ibzs6Vl~ZjH1"XHgLӟʀ?ә,}kY`ѴLPXA;s %a);l&kFsS7MB)Z"Uum՘~΋rGXsRxiv"rsρ| NJ1`=MB}B$mXiQ"-BqBae45MIw#u#Ox]dWӫY& ~FG!D5fO@=\{WP.pX{3Cear*7p /5Zf6MNaPBˆ Ye[H*q ^@wXÞ.L2|Tԏol ֓+nry]U]Kdp2Y4Cgtek2L綃o؂JpG26KI9jujq$^wƘ8 4V1*\-^7aMPTNV$;b11 xg\)I95hcVj1nqíK-OGe`+g- Wd[PM\8G[J~K +`U'̈L(fwh|(+8o>:d+ǧ8%/dnX@—BA4}eW+g[]Ę#rMقOJ]E/;pOT:ܢsTu5`V>h7x{h8 =Z ;;_LE١WE.M%,l_pFKc0VuT=[>S#Q9%|idNMㇻW=G[.3a'oMT)\?ڐ0lz1+߽ga~C YKPHz˾X3lP%K:7(u$1MOD fҦ ;jnB 9F4+ݽa$ oa7#yCZۣAmܑ=R-ٸ)!_JC2z)X|{'?􃔍d[וcNg֗\«Fe'OSZQch_mLfSR<7|6&H`!ef"[ue#EՏNl:?'fke=+L.,43X|"/T@k+Mm(.T4(`nh  SW)Kl"=ْz~)X*~oE1B8 $D RXkw8`~Dj&C??}̹{B9u?9Uĥ(=3\R*)g/"aoԤlnQbz SF-cgjE kX$)1UB'z  '5PfrC>';GCH ?V%DL\ Uiv4 .\%s4^p~1!a54zQV_Ix5IÃL]7NaQXe;?өɖ81QbyR;SFf)k=/]NM"IPGk??RKꁧ )\ߔjT^ZtL`4EϡIEVcD}xMfuBK/^4PC<zrOs8/4PQ(,A5d oáWH, 6()8ZWX l$2FUb0AnR^$W \|O)nzmoq6RBy =4dK}ÌUzG E 4W"f=4k7l Ss{p9\#a-Ԕ;DNo~CS3`Go9鴯0|nt&ɷ{m-oPSaYcjx\͛ ͶG !Ԍ׿/D;|NDXa'+Ýjpe~Pfs*AlY LY~d답)ja0&Y)SجT8 ?esWQT;qrnagM6Ďܷ7)\/KXsH֭qE|n.m)J}c %-AHʇJݬoLD6.ń&!nз#`Zc7cb}EܣfuR-]Əǩ|8ZU׎+YVA^>% [^XtN|)Es=EX9\ʶGq;yQ?wcwY;Ef`i5G([߮)vpRT3A=x/s~؈ vta f._Ʃy\sIC$vE 3Jn kz,iZ])qeP?'pU\d-*u)|xj!id2VA~D8"[! $!7HkOaKӾ\(6!T: _X&,Zֹ-D佈2VТgb-j6莃}wq(F3b^lKJ0[}˺"#GVs'fь` J" S>1&y`/7'~⩏7$cIEiKG?i QTtܜHm=ݎVN`<Nm3l\o iQzfv A0]'TcIn7M$b4-X$ľW"|@04x_mm%RMVAn=L! n:ZџiH4*(8!9^Ff1/pø^%H9XIWֵԋ &cl_%EJBn9_ p |hT!T*49bwUߓA/ ; :yayect3{:pzt4IَtQ6KdAd:=C\ƦhqKtzu>!'<0kpQ+ vkP{& E)<Ku3 Z_B-R|v^'@5L1s,wU^5VR(B3hq+iHg#qN H϶dLI潬iCBU)߻Aޙ 4~jNT `j1?EOrx=p$ȳqBJ{e{JIdxcU).]=L=K J%xH[0t,8''bm[U8BB1_hrv{ OqS;Sٸ! OzAs?&$xHw, zlDh*j@/BIrf lձK.d!KlM|t| iyV|]|s, 5'9V0t "_jM)F t6rs~30O >Y~v#m1,q4:Sf%Y(ӤGۊ>{]K7 G;p ,v8^Z OF"N1țe:/ez na cuc. \oP9)M Wyۓ 1ꑻzxo#-oK^GUS J;.^`Eg|gaR $6KKj^KQ-.RC^'>:;@YsXRmj]1 yOجT_|l>/5\ĩ**p.U?ĉe2#Sתy9d\(Q8NI {\aQ֎ 1 j ҺcDTgr[ˊ&SYq{1[خ:Ñ>f?ksKz̆eN M}ˬm}Zu#12SO<#٘Cّ`׻`7v'tv_1 c'F)t+\55n,Dtt[ILbEorB1Jg;(t"&!Xd.85Wv} q[}^AO7 wq^0)'ԕPumyF͖OWNT)&7WA"*v A"n5236zrb#p#PJo ]8Ǭ+{"&og I뻓p>"`Ƥ]Af4 P\s~oo{t wͻ7ίB;pW\I;vgV_0i@aU}FZ>_ؗ|Hpe@%{& /QHyj#?g:u>,6m4₢ёGn0 h U*/tces:0Ї0.wHM?W༪7* ү8iBWOݿkwnY^ Ҏ%:܁'<2UT}U-7G~թLDM™N {x2t\K\Y1]55DʹBIX}XΞF&oe)4ܭj8R 3]]@Zz|W5oߕB ͢Ձ4\r`RČR[Au#OwPN֎ ifp&FQ!Asxx$3EfAWƾO| Ixq:&~ht.SUTkհ~XW4`߽ uXd%mf7fn_ Xꈄ8m*D+ɳAgGގh{Qsf[ŚP>T[&3Av|X n}843 Vk!ۤ^5i 2AaiBۺ9k 8GWF@7=Xţ{(mP8k$buX7@_ o类O`Gh'u|{<3m:Y2+g~&Ѳh{`U%(Ih!X1gOhH?1 6pu?wX1i*=,&olo= @M\XQ9?? 1[J`5Q߂X`Z΀fG xsT\m>y`O_T8OnFxw,yZ͚H!*G̬Z$0cg&}L`Pt2zVz9H0O7}/5ǨygS=FAb*}h0zC^>3YJ0M 3L2?c&|56^^­M= "K#ULїzwJ]޺řo7x;l*]'#TADjD}|e`_.z`1'XY;ʕ-"< $>nB4+O97Tr*ȝU4*;cGԸT `'/*{k\9GZU{46@z(H$v=!4Gt+^ҘiPd^T"8T@Y}F #ŔZ9f4 ~#XKn裩w#[84t7ksTX!1P4$eBZ_ B\Ss ;.Iຑ:R͸ě ` G А%}{1DY3Ԇr퍛=,!N,f]zBcy|nc[@^Y3sQ}ZX>]"؞%{.I h'Rͣjs<İC7dY['5c̻=Pe5鹶0+!")n)06%Gk!^cA Ӳܹ[l_9=jv_8~f "sCŲGXW]]+z1B:ߝR/9/Pό…rc(e^J裂ӡQ3$+E˙H2TV!;mq,5&}Ԣ + uy^,*Joc!\k5Z~mt\Y^,F5o4 iQ_d79wf{ȽY8Ǜ`$z |H$+ ˙BQ@ GaiHŹS˅T!F2j-Mc!7C+X=V0fVNkI0,; =70wjHgoB^kg耼YGԺsZ)Nnr(hYqrOìW8-}x˂`v)|0 Pc[^3Lc-7PV r)oLtߎ$46 $HtMm"IFZrƵW!oeE2A lv@0q) 9fv?ǝЙAGcop@:1EOe?oc2s}?zI_:ir78|dϷ\Tюz!3ƭ}:p*M\hd潤Рj;}=[2KGZum? 5o:2߉# ч1S-8Aa;ZMf@ ۴AtT``'$@(h?liAJd\\za:̇pNN'\ @ٸS pHJl.jvm-YߡhNuV`q" "󝫖OSG+lxP25[k)NDY8gNʊԈc"g&g#J:f+Sr^mpޡ$rIN1^U{WK O^/9KWngٌ)wB&< XROcʼ&%ىp"4SyacgVE`";܍]J$2Yt%S^R:8{ D T~]q\'AHrtw_E4-9(16Pg$BX^po =Xfҋ!8Y$('xܘd ! [@ V廄4ٌqWIn`w֛*ֱg4fpzVA< Jwv'ٶ#B0ᬝKQ5N<0 yWu+4(6&؞t=*6%:!TK&S<g#3jZ1:CM&Hw$a!1w74gޯ.,!)O⇫8 F~4+42짧@޼t<gfko8TT!)=U}e'LӌLE(/Qv/s}P8k"5+quR Nyu)4FЎ") zSp%QnE$vU.{yOgy_(/@{eAd Bg)6i嶥jQ{ڙ?ٕbPY3Epy.mqf<L0Eki NIp0fIG,ѳ =EET2 /N#U2H#2+:܄OɲEDRdRS9}*ה٩PFJ|-sy(a#R{T,8ؔW5ե}L@+V'ƧbaBTopǨ {@bPLTn`xr S`{Y< عmyAy$IKTe|bf|/R-u<^AV.Ձ^ W\Ï&UN);b %WZb`Þ;Fi.6j鬸Vk̒T4 -Դz=gS@`o b%,{Lh+Y6 jʣܞ Elc2E(x.=r,ƢÒfj_:0uOOd· u y6uy4[{m['0t(r g~*8nB-i%!OVh=Mn!HhBԐ"L(Yyϕv#X_v{4H?)8:뤢;~M/)np>ed+"V"E.5*"xjy?\aN~̈́;p3dƭ(>H ږi]woW8"?]215'_,|M mO9C,լޕ q :_CtlL(fZE|MN\][ ^U@ 渭ʡ Wno!0bEbAie[z՟0}bjRrU{XjTZf8pf6 &8^n9"YUG Qc~ܻ纙|aul}ARJsm_9| vɝ.^Q8T#歷ڷ3Qk|)I1| 5lAnjj ܉GyEfS*e/%B5~][̅"M -w >y.8ՍXi5jl-{pA U:3P@kY2 ̹z_z9ڢpk`L`04oT]Ge[>pw:{@Eh]]_9hȑ?B|)R^t !M*'B`*'][+ը~;O%Mk7,sbJ.kH8Rh<5ah zJSZV6 ;!ApTr Сg@>#XB9SyGoWOL^.@T#|[-U>*|NBZ/q/c aC4s|س>Zqа?.6] P,vbTZ[U:hObkPX!T`='K3 Fz+גH-Q;H, `Tu`r`Y3kCu0Ø1r…|xU)&B>= ^m_?9%Rw`*Y w!4IR`0>pjy)%PPas_efIwc]'+٪P`3tKBȐF ]n3<Ϸ.hi}<9N cTS ӡϑcɤ/2 05vZ"R53h}0zRJ U|6J' JOsCz)d6]!9xpy `#ߔXS,DI%ljвWäUl{{bm[l4? lx̊j굳<'w~hf.Tbg_y;u=PC][vdKfP%m0-ʊkUpTtV(]c ^] Zu}\%E3J洫"q5N,7Dmcs&NgX!xX-;s^0Md`o] ʅ}km%)!_vnQb@P:BO9xvA3[9 &k R+_3Ǣ,7 EħG7̭*_ &0ՍX/Ia|,a<660ЌgxNfհ%!ؗ]d zeV0`znn \&!7Νszɽ S? UgHKY&{\Gh 'R֏~]$6=edܡvrv-~pf9m~AQ=9Oo慃\{<\-(Y.rE yrqT}>1ORO5(yW}u Lm{L".w]TW;J9g}tRVHCA[gTφtmɵ?ÝDt Xfa;=ڕIZ/$'q,1"; 嶧P72:ƪHicaX "D@!d襹gV SgCq*,>l@ 3-K J '# ^ѩz:Lk&׈ \jж]IJPQA4mZ"Fj+EPMgy>Nneȁ)̟xuIp endstream endobj 2338 0 obj << /Length1 1386 /Length2 6039 /Length3 0 /Length 6990 /Filter /FlateDecode >> stream xڍxTSۺ5Ҥ#H7 & wAjHB$t^7J](]zQA^x}kk͹d30T`($F$[ PT!4c$f04 T,PD@4HBRQU C!ahN% III(<0 8ܰ!`0H#K {{{ B('y^7 0a^0(=hB$g8/1 C!H(0Ðul$W?ѿ#p #H/ Fa^`8n PW2 cBh8׌¿`Y UA4ɯT0v}+{GBt6Ex´T`&ۜ`PJ\\ =| ¿ ~;31`pG 0@tsEps#Ik9ƞ`0( 7 iݷ43(@PJ@ 1@?X-# W}e?#^?s顰̅xMtk;_YWwGo?_v#| `UjPs_ՅAn€jPB:a-+Vp /e77 3@0( |XA\w4]0YW AAMDL`|~ ,Da!쌁GɯؙhW98rLV{[0 B2?Ȅ8UbP欁gՈ" zX]tQeg: MqDmLПg'Dl* XG.d44Zxzl.˞#wN+-n"7Z^w D8N$Ytfom%7k2SiCu&'NwiW`O4(4zgGl)ð {x1)QMmX㸅ȣc7RՙݵwۍF=UsRպ\RfAd'dPYcBA{hۊQK,Uw ^4mu gxš? D?|p{jn+Aݥң"ę7Ej:"v"7[Q$[>S 7;<Qdnef&NJ[DVҡ5r=gUw8(BJ3{9Πsuwo!!|_mTEQkWM%i݈{1:O;̴LVAOE;747LE?!һ$}MaR4͕zWd'~ 3C?~ՖSv[&-Nn䃼@jie5{左[F׽Ts UIȧFr):]JZY4%P!M?WșhϏ$ءaSzGQ4cQ˚]WV?X[t8 4"Se =y<#0lZp\7.E{:pU"U^hzzIǶHaITX>oxYPb'yq)F~Oi7&lT?ˮge(l~90qV9]\|>\*Zdxv]W}[?+gM)e Pjo}q}G.Aj`{ƴ5=G3WC*IDzZ3+W- u˳m7fHqw0LgJ+hR7RI[<]6C3WILggdgltyͱJR%5j0[0r'm>8i(s>{meǏlp|in|;ԙvgn]I0S? !0j)n-R}E:/!#G㨛U9:o۴?5f>b?^\sNMܥb=!ڌ8wnc\6΂'2,Uϼr`}Ʀk^%]q[9NJ [x;N&"- 5z.6B<{5B޾K~'\}BЄeG4lz}]g$-!JXo*T2.?`gl`)V !d~oѣnW?wݑH ]@ O7}oz]y)1X R|[727r4UE]zaEi-U'U7yYhc-b0kx'8tx.Dѳkx%{@! f njuɁby蕋Iv|Ho J8 3$%ͽl˾&wIbpa[rfR cG(]S6!bs~P^Ξ}<ѐ&A$㰓[v²s&>'+Su oR!Oωm") gK[A!ţըC~moC| [P輱:Rǯ.n"cd67wK6Ù_'Sp|,F|a.2))9 \++ĺ| ,"bBnUhME3ƢQ/~;XT悔 MqwQ,;[П!%7QM9J0XHtvdK.8JpS\dYiہQļ J)N|[!=͚QbY%F~=Q?cґF՛^gl᦭*Ҫd_-Ei;·'Mc]L]ecgz z 6R kSHXܕj^TQ J̐e4>c V/cbje`rbqؙaΌ O`kn_EkV2BDKW i7Y͎rK%ȑ/ɷkhԵW{|Czn,)v_-vwı{ e yѼ5OR d;, ]kA\8]vn>&אY8Ca"r7q֚啢s;<5 Ll@.Or%Ռǣ==+䂓6sS/n2~ }URڈV0fo0pj22fm˨@.g^pdt,Pb쎆DY0g+*mռ?sngS~)nFXN`fLe鳨N}t2m `^uyu'cS]0 `%O)Ĕ J(RK0)a䫌  "MO-5Y@+횃-aF $O8fh1*N>niȩ.38Ep:Z=g\P_kn+:Xh߄oqʑxXv:#-"]SY 4{r#}1E(BuY0ՊcyOB4/rky8H»rCo 27n'EPf^X|;8Ԃ&Q`YKFY4@F3nfyXܤE)b /c=u1r5|!*x]m:1LJukgsC:!a\ ݅xVfO^z3z:G/NT+t kNQg7ʯ62OWNm7w|PlU((?=$F_d2R^_EU\UE"||wp_*IA؅ӊ)AĨq\ݱD?jTI?"+!r S ;/B،1ПKfv#{POlduk"'r OP5KֺAyY9XbiD*NQz)hrM3Sv{COEW=U#sSc/$.gK!Aj Cb%\cV 1B&m.T 2@"fUR_B>kqQy'E w؋,%t=/齗AA]ޣߑRFɓfab<Șp[Ci$q6qnyQ 7(%CYFXfr9bR3ȓPW@яPHVrJU͋7p,lk_*Oh}'yIk|N-LKR}şua sjR8Ė8w_noUmNf S`{*js,W|ƩI)i"flvX=5S]j}1w,oPN5b* ]*"KzKM%)։u.MCI.LDb#P3pAk˪kSE]u.z_|>M`qX>u"9=zڳaz s}%p^5`,hoN~Jxd~;B jwgTFCVclSd,iRоTsIXa-s*:EG-t>ğJX"[ss=d_SK hǧ'y~{j2K` ÍexlTI&yʞZԁ~᪸ nUmV}BWQ9MD`Ͼqn /ο`i$TעKr3ݬk-=mxA] Hb`#b\ ^y)Dgw06|bNmP`f&2E%{ E{S0d3)Fy!Pש݆mO/O&h@*-.>͍$lmKPYg5PCk-Ǧ *\Z&_&FLX?o-X=8~8 .+"=`Yδߜ7W@Ce+37q㼮Tw;?Fz0| /|;ܘ:o) Ds =K-a鴨\gWE > stream xuSyQa"AXHx\dDg"B+1+|&WY#]AĆ#t rt&TA>Z4s:¢gBvP#X4L,SB ]3i̜!>@͝[q?,fδ6Ptw'alPXp+c62@gH4Lx`Ѹp;џb B;E`B !@5|SGa5 V ku^(o>H0fn_T06x)"o1WB;Blľ  îWALd3Ep?5wO-47˝dq\xӽsiiWsYw! 10uL 2)5,fμ87 `px.1"`P @7C0sN0aB0 Q̯4xf.=eςAp+P/AIg'ϐc0nYXm,Zn+t^fD6r)m`9o9L{c" j湥i0=gCT~Ф5EkcϝWFWO;T&#񺓛Qz|%1͏(u#%[҅S.x^Ѡ[ꨂJvU}E*&6޼d(۴dzt̬]ӣ뫻5S^ّX}Dkm60dx0t~zli^Kɚv󶞆{k'֩#%ILf=?x$6wjVurhu(237k<]iu4Mтָ'" ^&?S^PZo#fn=q-ޞ'IS 6Ɖg'v5+:+E-%F#/7삯O$1w_H\W8PAݓҨ@BT9>2hZJ?U7[qf*L&\꺪#oXl-Aih\Fѹw)}ʭDءx5{b 2+: M%w:~uxe[ؤ=j*/ާ z:V]q[e"Y)sa@&YDtd[~Lwp[:eMY1uX|ƹڪ~9qluL,a$+o[{$mr>[4|x~p7>Qi\XZT< 0\8e@<2}llDUޭ\Q=D-)p#1ve9k|U\3)J)}AؾގWuЉ<گ4kli3[}!FW7=81&A[%E R9etI犓%?Hd)g֍{}:drވ>~s@ҞhReQ? {#nq69WxKKԇn7r겜p=*VmI.xu$ #c|?M>ՙe:Y`{Yt2C eͺiۍ{6i8U捞5 K֭^]%+ ڍ#VE\~E"Pk~%lLs+ęyoj UVHF`iͶ8QO 6kKZ$M sSC] ąhv~B1Ja:`:>LcKRa-4&w([nR(UK}5*a㧬'R4>o R:`4V̷(2語rnxjo \s͓T҅ اPPhy`#qRãvEjA fR[SiNuC%eNy՝թsG9޷h{cdE>!Gm,)hi|-M7Q21dՈDZêhEm 쩒\h endstream endobj 2342 0 obj << /Length1 1626 /Length2 14119 /Length3 0 /Length 14967 /Filter /FlateDecode >> stream xڭeT%qwwwກKpw.798ߏgg̚U֢"S`p4J;:11Afʎ|LJ@ ௝ Jhrt4utI9LJ@ptvYYhuOzf[NNvvWrsY2#i]stn?}@ K`D`Qvt[@2 "?qS -ngljw>2 @CnJ1y%gl,'fw$bVeaef* Z̭v/F `bce5:X'R<qW%pvh;z|ظyLl_B|ܬb oVu?:?kf`w_uv?y hh.f5'˭`lFRh r,ܩY8ޱ?k._[C8sGʩ/y8v4"" b-n!,FYg:q+@qjޛQS7*!pHDQE`- :fdr|lz!C~"<)n@)Y]֤sEkWe}`0C潕[{t8qrI5&gbl*V7V.Fhyh0&RJ/ΤI!v&.7a߱޻p~ (l T3e_IZfi#+ڞ0 Y#z'B7%n׌w쁼m~ 7*V/wVL[BkD.H?Q%5!RBՎjN|V?pF 0 |[8{ ̯_OH3b|w{UTb v%Uq#p^XnQq#΍9?ekr ύּ`&8' . 07Jjh(>*d m!I \-glb䬕~0-) ~ȡFx^]r_nRÎ% bn dȷXkp ){Kv& ǩ.jWN:?0&0fh |:C ]^It&)6B".?(E;5~;UKys%ta>lxn몠 c}<-97Jܢ)Qi [uDx|쇈i}!}o,N\Cj5B!/¢x7Z3YA5ONEG!/^fC q@8 yVs[׏^Cs Ի[${acYfS vIV޽*~ñ:<}1lO:hg`\fCtcKfWRӛR2{╚9qԧ=k*=ˁMYkr}OhÏ{*hf;VϢ+cKnK?ʫ%pT,?uҚaFkʵ~rk>X=.3|!; On7( ~IFC$T vMd)YL sSGfv6vc+lT3.ퟰ7&I>U8 O G7N^F<'*uу#G fϛf*׿u<uE1U?~}zj_iW2ߦ[IϯT{d0f>` Uo3YJbl|+fi`TRKɸ"T϶^7:/L$QYf ,:(3Z9`9҅)BS xJ~u;ĸJݢ9@lrN>DhX|vR9~㑺t ÷YfcI3R2d5er˖5, =9Va a <_;DkcS{ia` K``Xuj!s}"BjHswqR*9mXȄI^:b;D88Dy=` H=0* 8čjKֵbd;B9l߲nj0MRU~v_CþEȣ;4 zZxhXǔ!BbBĠFxpm)꛻+GYnWHep)cHd=wTwI@AϪPAKS~5q5gF>c zNJH~wplqi3Nq;t[hv!$ 9%b̨ײK^mm.خ̮S wQQ+$} (\q =$tVڙ ;xk&o[ szANuv]Ok܌#ܬmDMiE0RiKDנ䫄 5$"npb횼Rz7H*g1S^.D(̬0j|\]ٛJzoBmE01FHӒC}B3S65}ֻ}9If!|'=MZ>sck읋#٪ 1-8h/{y(σ>٢#L= >r +8&Wd\ uQnG&7p&v^ڊ[%lI*eR RV>DhV\? f5vps"3R8P/8MRAtIy?zW6sA%\4s:h=\?2@z YUpKw<@h ~˒~Ψ.ϒ6 %β_w.R$uy#~`m# x6z҉KL&M8 PcN. lq{:bЕa̢ T펻+Eb3q?14ҡFӸ#ц*)IgiŞIf|IxjNp~Wt6u#( $|"-:~_lug;R0YaIn _$D@2JǼ 9>ISH|^KC&8;0vur > ?;`"IgK|`S6Ni BOY#~jR#A#u@ܚ:8G |`ĀJ@lw4r\6ޛwkM_'OO h]u"!I2V6{a&UeN8Bή".l L@<# 9!-Ю4`ceg\Z=SR>105Tx^jZ ȕ(]#}i3z RnI[Xvc2@h@ gnYlZ2e@(ODF$p\SIPezSQ6_/uZ/'aWMCtcSo M4΁fN\cd"V\7)Knb:sX!;hGKS'A-om]YJ֒zoqjBeɢ(:ck,2 b8ömhp^yE~-^%Zw{en\KfŒ&EKaGNg=R<.{r$F8a50wk^&EOķMi_ "~}l_V+39\L_:DzLA ՓؿE`5 ׏ >HkIQO@Ī"Uօ="4k`CQ ϑHK%8 %^C{uIw5}GDa4cʙG X BHND*2WRԊIx{~G(x#3^D؀GO]\u}#Z;}1^vTo] +`vP'ԙ/Wqe+,g3/J=|R)P[#'RE7<F5\+MEo^[Su6Mz"zRa-ҎVk&ȀO}_g&` NLva"vҮx__ԮBs&1c[PW2 kڜ_v^6ե#ˡ/q5-k(Rz3o2It9ФΒ̔>Ry|<ؘYn`YhL"p> d]9y~ԋM V^* HO a#x* lȂ fr _)sB&aCΰNlh$1"?6Af!XM9h cLv$PSJ8.\%'s5"Ib<$%9ƙ p.=YY6+S w*FC{^68 !i:\hjAl4gI=muuf 4u͈3P$7.=fqAj8Y#9ؕBKH>- Ip -:x|m)|fyp?GF8_~zq0F`(ꦎ3qN^*1r} 2G)YG&d}vm*KSR&ٰyT3춴hjq5S8=m1X/#vck=_~L  gjGm &i)IxCg} ]M[ i]Ҵm2|MǴ_&3#rRf"c^֕Hhh?=#y*_R]drZʀk~JbΏsA{ڢn!ezX!Aۑ }Lh+hUl,Q, sԅeCFafOMqtMh<|W/7-zsQ E;Yc:; 0,gPKֶц .>}}i\)C(h_ "Ux9h?i'3V j`x&{ }Qq:bBn%qR %jQ ""hi|u_>յk$srcnOG>,2q~p>b! h, 8>.tUfi&f hs=[Ƞ]8{ٵT2[zWAdV|I>Pmp8{LZ#t&( G}~[D"պ߭f:2\b~)lȵx FdbyӔr-HUƟB$悮K$r1}f sNpBp{z*i''k!Gete a9دx֧+8&IH0NE1^}.At ~4/ 7 ?a"3">r8vRzdYix9t來p~!*2aH(֐'}a5q99w)̦VZx1w;+<4pLfgx)c$'_'̓1C)y[2f^W=B*2B[Øh `){&4bG0U[TH}U&&t]~3&0bcp´eS4NSϲ~ِl#R6z\t+vIo*k"㹆 oagi?@w'w%c^T$x`;xzeZ4>W9׍7dpHrZuP^`uy!#=r ;@<|ZBghEYUeGQn耉wrcy4nj7>P ؃gӓj1Rbc@'I#&ƕ_اDw<3n-ڳ#]akvI!!^,a4?ҏy=%0V˗ "*}SΊVJw(pl/`gT!]w;uPs5T'Grm~@HWw2𸅮Y ߀NYjN 5#_N / ɷHZ>v6ox[8}u)?z?~$V6'GyJ27ZkrQbՌy0 {yD;lf&T1JX.$:(!vK1YWE8T=Ku1 q r1,H[e'Ktk%_=Exhh܇~aTeVlm ']rmf|_XƻMR~)BI~lΘY<+PjkQ|$wHR9"Rjbm~:l0/E< e^ۤda"zJ|ȍt9;h ~.M^6V 1ACޚW)y4cmgucw6l- 1^\g(pnVDqK=Dx#WTFA(ذe|\9e@ P̓˲DW-(OMp-! B@P+q*O~Aw䀍we>k IqxUT^o=oKRV .7EKZZ5x3:ǔJN ~Myfԅ^"&Dz/1=QIZe+ _ ߶5FP3qm1"wEަ򶆛*<0ZE?Y\~4AXo76WHfI>ANC@Q}I5s} ?deʻ41O~pEý`.xwQ.X9A+mgΒu^cPq:=s6M2 ۲yo( ¾ Z"0UGI^$'4mɐDǦ֤(&e]жt)t\dL?0}V1k3V] R;M$XbFcP٭ٱ^C_vsȓ ʯ4]>f6I2dGBD䔤C6"׌) ?R<{FZ1K8_P @c09=vۉxWpzUoOQܨq\omP1ol>oe}LiiD?bJr)T鳎+H 幻Г;c/:jux8y|=U1gReqNE]% ,xHnp큅Ώ<6k2LFŹxuA^u=fInL0$(>$ib&U$Nf&m;C VJ>WK/{z|a%~wr̜IĿҜy"n:3| SSd`)ROv!@ f&#O 8Lm0a8.⌟k4Vs_tb9 9-#Z[oz~ nA>Q63L|Ð@eJ6keC5Y,w{"7v_1qظ#ո&Ź~`˽ٖùʵ[8[6KTTd2IR5d/g8AZ&?;{ `6~4c)t/̎R5d)O`d|v!unqe}s&3ZlO20EkܿF;/Fkz@h+,{U[|u.]))/b9h|x y.. H3g$&GvCRP*JW U雪39,;A:SZM e]b/fp5flLJ<w(UYoX#c~Rl&x(KY ЭǬɛ鿂"0Qon#cZVzE -*M9z!]= Pѷ,5q꧶S{;W=K}RUQCG0d,{)]uAA~ж:뼯ķR. Gy%RJz+3Nfxtb1Gqg"<;fdty*gMid,#ӵgiļ 2Ic877_I7U):Gcӌ&r6r-R7720ub8xqP sklQ-=y_ΘbAkObECU ZE(xZX# p>$T۳@,t8WoN Et͛lO 6/8V:sR,8TiC.byr&m*Ƞ#\#rkS&?FZj8٩zr4z~'A[>;n9 ^+Dt,VzD %d*>Pl[9Y5֨,3ls# )8FHz^a Ո)ޚDTǬ^vExIgnk;a+]&tFkfʭۈ!PCs\L)7T|IqH{ά eŠ#cjcTך5 UPA7&i_m%͞h-%ױo03O L g5lZNW:8Yؾ}=VFfK|>+ Zsg*kAR(Q6A:FFw 7$tH΁nQzZ>6~eGJyv]lWñʹ&2D1!S^gcgsz3͹ugj}(~v٤P&Г[UOYFo'o)ϊ _j:\rQ^YV.QP[яWtƒT>j <04XQJ_cF&U5&/4|=֒Fсɔ lA.V>F嬫'Fi"gXk" !F]R ٸYlsBW<-woWHPENa4KƵ|cC5Vg.<(*KlcQX5Ư(^o9.UO~+'RU"A;$luyDe'eaKWc`^5Iv꫞j8;ؘ7& D"#kڃ~÷6̰+638dӨYh !7f1F\ Ej#3Ub>Ye ~.:7:sgw^-#I{>k5o E/o|3> stream xڬceݶ%ؑ gضm۶m̰a۶m/s֭v_՟c@c6)"  =-@QZƖ]FW CJ*`dnk#dP560A[;wsS3'*J1毧 _kGEcc1 (+..# Q;[ R6[տC[#Js8u3v34GE 3v6wt 0w:8큓-Ml_ 뿺`rNvNQD?&-l )_0N6'c7b f`4mL3j_t?/Y_V3s'Gc+Zz1 65Ϭۘ-7rÿDPMB`dlKoH˴}$7PB oWKz+]26{ gX;|ͭO^Zĝ/5tt;ə;L_re#c+sz:S237mkWEdD7 _rIoni[yF@ ICa`{& / #Jrrsxh0sau\jVFNTv}_r&$yAOd$0ާ[2t a(L&VЩX?_aYH\a1=j\M#.dBV!X7ScCJWWfe%A-O+iw&RJ>6?rxSk,ޅroR*扑aJ8DX0ll֌<کjH`K<]3a쑹*秌EX" ֢H?2:VpkDbiԿG[J}Ln@ $UnEC{d.Vw,~94y&ci5mL@dęXV㫦*Mt7I庸)Yn=IeɏKFqJzQuWW Ie U1[#Y9 =ڡ?caPCB,T} hl'b'Rs KW*tpdkf~nkAs!_DЛ/lS <*s*ur(tzW J!0ѓ#FQNcn{jDO=bzz%)X+Yx6yp<ܓaRIFT% 2CVHhD*n A6cb0ۖy"AF'[웫yf$0n##:ƙwT(h 6J$L-3UbϪ>+`jh/yTLzlX F^r~s̍.] mi]:sGA+YP}9b ],oV/z. ;K%N>_~\<_}/Z b>OZ>X]:?4rpv$FH#;" d'֜p24|]fAWvI/HXJҝx{ֿۀhc7 aVy 4W=HZ?(o?佱gM'u dWlw0#'_]ihc/uJt{M!lI٘raW$J8蘋n\ܿetNkG/m7Tqf$s. B~e(?|U}iDwUGO6#U" \S)Wg|Zج.s9e/zj1\,S){G/a(j%êG 9LT \`YX e½;Lxᢒ˒Heljb'h:? `u+=#g^6/Ƚ.G|oU9U #!+K V^gۤ;2E y= N N:;qq)? oS+fZÙ w#r־ эlւMi+}_rh5(E^>#ֶeygoDVPǣw đ|?&(hǻuaSaoZD 0떋_ZVyǮ*֨9>A>IyV*Ŋ;QMi_=@kzʇ^I$Q>LPN}[7-v?WM}ȁa2 ;Ӝl J"+<6.,qlP%l< (Ȑn'JX̓ucϘeݐA8\Z%$ !N tΣ3@8J@,s>wx]\ydAH֭$/eR>R=uer!"1jwe'%6Wh=)=> ]E!5\ҹ8Ke&"FiZ[ˈ|O@(ٛͦ~ .1a6 ̝HЗM;lg:'54GuAi2/=kwLI\|ӊNx`&L1q ݞײv^&],b\*N%4Ar>Sg uƝKs[Ew1aA"܋K:9'!Fm^2P/ >H0ށƻ:rUx$\Vo?tC.c`9IHKG (]R"z9@R qIvѪ_(5y-MX%"TۼQʮD6%ԯc$ה29fRoٔJJYw^=#3?x5 V`a9 :^#{dz"_= 1tx?Ln {[15(\>`CZK~)i Ӆ YƜkǎ:t݌k_%&-q L>Tk{D=B畇RoRo̼D<E!HRh.7ɾ>5Pܧ^.h_R]%>Dz4;d9Y\n $>ݸ cc^\c|Ns:|ZŻwj;'X,3آڣM0A[u% tM.-'(6YֵEf/{jp@$b~RoW!/Դ{N%.8 Yl)#ƇNV'Ŭ] TtkOC9 n,"«A x!EV??XotTв# d՛OV{c数 vNd{XjTIf,S߅E9y9彉oQ,l֔b/ͅ9nj%aXu1Od.uD S;fM)pfL-#b5lhOOtVjd~6 mɦ\-aHyإ^of}݇~ { GNU: :6H9<괎`ZKY%OrFJITR#>QL)< ?(k轞odE'/Rv0GRba!I^>UF$F9 IqX3EH(w6z1Z[vG_rF/ن%.(C})/m> C$y_}t80' Oj |0<|M=?IyQ`e1͋ cVj?C&ATTV0,ԓQB=MNbiVMs7+RQ-;786"45vN ׊IltrCdzNk[txAQYT1- BZi'8F9fFl L<&X jʼxL}k۹W0y{q+̓!'lxil=ap2d,2K}o_RvMITkKfO]}53/c?Ep ,.1:BMshM.<ɻCOk;X0*?}]kWAeU-T0ʝ*qWlMDH3ԶV V(Pdƒ07j[vQNcTj~3^aO6E/3 c7\ @'ƌHZ5ŗO@c Q #Y%Z[":t3kEm17ojd2'3B#ދFbwk n]#`60}f &{7|R@jrg]]"zh˶S5+\Kqr-nEt<37}exZ)-,dڋƩ$ujx֒> WqO1U>yn?b8 'hc5cGfL==䈀nOsKV aGBiX3Lh"(lsEm DӠdr_&B%as75$G40­BZ<: },qHں}q4hԞ/"6>Um)˵wꀫQD sJ+%t4H4nn/MxDžŗO5ꠧgDv3iXRI!&0[!4Au40%յ3VDY׉vhyX(N+ ^zuU=YhXN=ܻeY`[GkM(S=aBRmWo>(qǰ$jLşLi[B/]#ڭ/g</^4sB27CaP$Bx; S\IS#"`)4 Teռ H6nzF0aM BDbIm㳙0_{FS+W=Br?㧽m }ٖmէݖP~7ƉOK"pQjNohU5Dp]eƻKH8: *wP40^⢞J2cyȦ%U5B |eM&ca{Յ7lU.)dykh<$*^XG ',1{#'(,(<~LxiB:1Q*ϗq4*C$ mp$I+ AJ9-F$ⶪ}hwaFN VWqv .O FYsGN/ʒ('eT޽ާNiU CFO9c(`O'毐|]-7sEx47 Q#݃(]X%Dy.Mc#ty+GTv%},zm'bj1+:l k3})fM'OlZICu㿝A♶ϟP@w ?7^Uds VB#ӛۏkF}lm T~\jv@#eBCqIr[|95ѹ/jGUY< t/Ժ(dW fo_ m\QƢOBJzSLlN3D./,u{ZDX0ɛD ,ؑ]HN2ţMQͥs&ZC ;&i"9P\6P2 w: !ctQ3~%$5GVRVy_7)OyAϧ@͢͵nSM}:=@(r0pjy\qgX'q$EjVidq@Z//ӏ/eC97-x 6/\ ,4tQ?_;RZOnnlQp "ԗ9ͱE8Xi6eYL:!z9 ;VPwMD}_ufulD&3hzfً7Q_("umu}{2!e39idw:bBIl*8b.*bSIcFmRXLꟍ*le /I\{qI1 ? *B.ҞI0+cQU8y|>*g;+0ȹ_ӿ2IgAF8=DƑ5cl& Gfߘ=3F텴_[j݁ rŨqlf=9#X/[brFk5<8Cb`DJsVCg G>Dh)U҂d=9C)'3h=$ ۪ ks'II6Yt %j4F\qF IL<8 W-N8gs(ϩŅ)h:%AcƟޜPܸ^9Z܊O_̂ nl &gnMg| M؆ٹ「_Cx*UTEn$n=}m :h"q3/.CGq$8 a El$U!bG*'Ͻ|g43I 6^$v)% Յ~;ɗ@"—_49j Y;'sՖzĝm"6qr* N":4, ~'tdOwu]uiVݴydԂ6!] ǃl5_ɼ5%zj l3.ȟpi•a]θi̘uQC?-ѝDeUU3`ܯBυz0m`Fv4xj˒O3Te .dNɬhiʌ)ƹ)ykXD`1o:hu2۵ R1$79_x򮴥ήFU0ԋ9p&VdscU_hA$ [fC-1.H]ׁN4u5(C=~ծx+rVAȼus&"=1L1Z,H|=:UI2$"B+8KNGs[~/ȭ=h|b\LmȺdn9 PT/ιQ"- N06p|b-< 6@얋uAHCKF/:FH_{aG~BB.D6Nƫ'p(We^ LӂšLGhԦ4iyg+"4,z$Պ`jGh>g2 mZegPIlqj1xUx-=w~A. 2/ e3mD .E%ztmPԢfPYɰ9b8$/XǹFf5fpȑCSb^Nԛi+ƈ7Q* J{IA? ƸfX~ϕw#*Ѱd#C@pOq!u-bG\=vwdL|>i,N:/A>>-R-bwBqT*jg,iA^MҮKF,y!{$DhD~ pxC>Kݯt,]'5k(3U@RD٧B΄zy61zlc:XuGS*H?g:I@Z~eзo!`ns}׈Tǻ|zfd"FR }\ܩ\5M _JO4s`!7gy#g}-ȅдEEڦZk\tS 8bNf~ey?Pzi$ u %בЍ)F`fWUazRBBhrVٻȦǪzWa:bh79Deb~R29'A :sXڸx|-*RgPKH*yM}7a/W4@K 9'9Ϻ)g%q.τ릡K3@X "7vb3: yT SP@F!4{LvLQ~\$u{\s'E*s[a23~Ҫ!u| &].UYkE_1ⅯSyA^ns&ߖh$zwt[$J4o:νX J7 ưXp t]eP+.V9^r@s^FS-:8.#OhӜ"F7j~x]-$@MUvD鳇s-2a#fς5EN5v}vhRGȈ>eκdɊriN3C}y%yS@#{Uǹ}PYx=8)82}Aw)x;[:J"wo&=N<l2ئ`IUjӳF+4R{\>+9-.&˨ M7'"J5A[ty8O `Yؙw6V.o0WUS7A\OdpDRt'Un>|eq]N֯~z˂= ܙeg^0ZFXCqjn`xJ"?4ʊG`l`NKH :L=3cY{Ir/Y 6nte.TVyy Gt겾NZQ-(hӃC jG!X`b3vO;V}r&7Kw$\N6~XHػu1 M@ _4@hz rގ+M)2觫Œ\(rW6J\D_}Pir -pq<\|# h92l_1_/ZMW"#&M4505xrOoe,+LZ$|.rfJָW\Kdѷs/ p䗷|[2:˫GxE?y`j 4mkF:)|zn(yR`K_=7ucjSR!t!Q(}Nh/)-"\M3Pe͘fKV,U~dn+QG|UB}hKb xJ&(}:qen!f 0Wi: p3hspb(t?`8p(Ry-;>bSʄ /sE="[ѷ] Bm/𻽷E!ݠ| 3,"|hâg2s]òv_E +G@W)MM1[b.!n8*cT7M ܳQ oz=0t07wl֚-fJxq\ VjkX~[$?=7=CEaMC.rݨ$B;X7V66*g,9gE(Aq;?VwN%"`lEVQԜiJXRP5 AtzNdf};N?\Y/̰0Z;ކELrfb +)e*A2pSТ>t Q4\ 6yZط³՟(enB0/2iͿ 96M3R&N0ܶ$X*IGRg w(Gb0ˑe#ha`k !!E*Hs"+̤^^!H(5];Z*jGR4 CuVI`Ҫ{xC޾pB2CV8lڭW5`d/Hd')vWd;&m]{O9":quXWza<*;B#׬B#&՝<P`^*Fe#+M]AZvb.XKr2}oTj^4fѦ$S{(] gs)s[K{ڮz]X_JPLuIǽF6оv-KSqf]zn[gW6[|1܈Ɵ րw e[vv"UPG!M'YHP6B JY%r" q)s<#bX@R7pfwN#x  1r @I'kƭ蕅$׆0lJR DaÆBv/,W8E0B@4ct^<{=ʣд@l&I'xM/!-cZ(cyK<%\|Sf#J{u7>5K=/TfM"*C:yv Cj As:?\ssRRuf/Ko+G֔Xuo p 6<6և P~WqFXv;e1{"8 ݭ>6HH~Fct,zi|rb׷//T1ͮ3+5rzNM-8 7=ϐ,uX g>aC2ІXWyԚ/V3Lz&7.w@W x(r=-I+GSdi /@*ZOE6X&U&gPmgBT,{Ntߑ:ihbҟ1']A wk6 맴d~ }k .d$=E`Jizw}uKgډ-)w%y? 5P4 s$)#Q6Ӳ8b~ri@ψDgo>JuBmxyp XEpua9"\Jɶ&;;F*u]}kp=/dZ<'f>тuuG~*S* :/Q:LNN"96ܬX W#Gx*EUrmmf_P)L L2 bNJ E W̺yNx3**<2Ҹ1s%% :ʆ¡hm_~wͼ>!5ᒚ\jc[R㋩sCꑼB;*VTȋR1^inXBk% ! 1߫8'Y l|WbcwQhCW"\ouov7#-Ϭ LS`54g&\J(Tx8D}V>1vx BW|mOk- 5BPmR,>3r o 8 W6JШK+S9QY"}#O?^C0VH=~dfE fއTqTv؛̔dQ3Φ"L덇F>k\B؆k AY!)z {s{> >l3Hx9;#Jk=k|⡰9i$}l1;JZr5 } [cz-uKT4D4S6"LשFp{`=n#/x 4fq ȏvC(.Aޟ;s4,l Ab[mm2䕁gҁkz1=3W7_ 'Jrl(a]Fݧ[&ߺ^ϺX Sy/e:h20'zE$^qxE+ cC2f/ǭ "BʃuW/DK4164Dk=Ysw6i}}d x W$.:}յGX |Yg8;~;YJ.i 00 v~O"94_g`E04P_gcކ&BM5LgTMs{Ԝ:`d3XYDh .~@kid ۃh=!3nPe %Snpm;a0>WrG\Ѧœ@!?_29CY=KP\3'PKމu0 eYg2vʼs*̠] ;o=)>1g@odž؊Aㄘ)+ ψ&;e '\kIc4ș`eF]&R@}DWWɈܣdw׷?8ژWXowd յUOx)4Q7gcv%.•ϫ_o*gnVU L%b}m$ ;5S? ^vxB4<~)*2 y [n$H(|ʮ! jN,El Rᾤ hLk`L㚅gS:,|X x)0<_쳝+%>rS@QҴ3mБ善Kh}[j+?Lg4zo# El==wp$4Zπ*nF|sr}fϖwD=(+թ/R+ :TH3(KóvJEέ7RBhO2?$sЅM8^TKeh՞#īY/ }dmP$U{i&uMòOVtՂA;Vrn4݈ňݑcbfVf/0L]_S w+0Ĭ_sJf,ͨ/@3=.az9g^*Y#ג 1*6RlYUF^r)REP4,ě\ptT 8In6pGź0xy9f]Ȉ~4ͯo\c\6OT#Ci+ǶIdM2T$d|SH ڀLh1wI hr 'ׁ/ť+h)0VϪRI=Q%=D̏pCDGᚘ=rA$"&҇} <1쑌ƈ\-fMWlZ"`vhR0Jj?wGSO3>[A4r]1qwN>4ܡ=iS s@=9 8i[^+;RRi36^39"+:{""9p9: J<Gl' =+!E+%f)/ d^l-ej";s5-S3_ѥJÃ=mmρL%2 DD^7dl&pz#܂բgφ@>pk 6Y=L@\Pj :1=IK[V=Bi/I俣m}r9ËFe: yF$#ꨱЙ -y"SiҋA@ԑ"a&.к*Pm3ㆆ{8xv՞Ԙkivd`h0 N#C]CEJ{ !|I_tKG:fIuRaй'U`*_e|nP,dc|fxVrovIKn3@wVr_ֿRNnJo endstream endobj 2346 0 obj << /Length1 1644 /Length2 10952 /Length3 0 /Length 11802 /Filter /FlateDecode >> stream xڭvUXܲ%Nww{p 4ָCpw n!Hp'8M!{|gf^fSkMAEYl ۻ0 @vj`;%0nFp&.@6 4pp`O' NSM,L=ytYh޿mv@{wgGu bXl e]9%&@ht2ڂ 33`v0ۛ% 08;@n@3_ t9;N&.=p@fx[I ~{'S;89\QU$Πwxi6s[?;;bw=\2A&@ N@K's[;;Wz[@.@[ $vf.-AHgE `g?07? ;3Im=@ $V%{H,}"7H"F#}N-jkdb>1%cbx3Eck؁l=o^~[t/dr.&m|_Y4WY,Ll]d OklliXl /hoUKO ZeV}6#v?i+P\ff0s𱿿9|!bNo4> ؛fNN?!X XgdgLIC:5iԂ{3"BYg^;<O^?1/ I|)07h:yX P3Oc/+liL>Ñtr:!\>PP;7va4C`99IpO;4>:2{ ۿO̘H-hz!sbNv-fӒ+9]66{9'iExrGq0Ĭ'Æ%ճFa 8asLe1+lDsY-6>LU]hY^"j)ahʔђXN_2S!4s,{ԭG6QQ]0Ә{?j\q"6+hƬsأC)aL.#O}sj֫ݓ! b<{4,_4+sQ.(TǕwݴnI?vqW(Al%.fajG Bp=֧q;#VK6O:Nh}();xӕux'(-*hj=D7ń\ʽnŗ׾TH|LY7bS1ZR4HlinՖOn6 9`vco9eΰlcBd ]`XQGƽG]  VCFX6iF`* e^^w~\P[r˞ǵ#+~RߚQLm{p ,1cP_+azߕ:%2 ?U2 ٩u7V,/77$_8Oṟٞ{">T@hgCͼP9AHl>ɠ2&lӠWUx9i'2xW[ok84yyֿ.sB: IỎ˾~ۨ 5iM4GH9WmÄ0˸`>R$[9<';/Dd/TRL9 [x\[uN}hw$ ~Icrz d/) mw&Hy;CWz,ے6cH~hxeǘN{ 05r]c(~9ٲ_ ˚脡˸>ӇI:ھ|f^v C|o"=![)Yd3fLg~}xl\6}0!{ 6mYv*d~B!֤Qe_12~P , |_>3VK=fwÃ<?9J}kIqKgH@\v Ŵ"Z}?xp C+1-I\~m(fc g KYg YV dG[`['O2hxgS O ]$㤺MȞevi{Eu5X.#t:}QʭW `Mr(>nJ$W$C œ"{k nózP%k5ΆפAD&(YKjS9Nt8+G(UȎ yM uͺMDq܍%M϶Sϫ䧘9]n~R|Za;姳#@Z0pcsCj@5E![$H bBڄO=>Ym[b#֘ð\$(a1o;Jͯ(.{ͺg(Cft&cdPȡ|v>hͅ_'/ an c}!ٜńig'vWRC| 8cE&|1-N @YqKC\)Cxx L.n?Q"l%9uaݏHZdT<~TʭtAy oڹo,f61qeMWQӽh80Rb&ZM&tt38~ %eܛsw[ü9/@07!4ԩ;u=_ߨm6\si%wO<~{H8 X'{aRo 6 # ߝVԑ3 (E93OeJs41:|G?S,[g@!-´UUՁhv se4y,\P?Z;(XV$/pn^I44FU@<2?sѩa,~IF_;*-WS ٯj{Wy*J6'W\kk@%}Jj/Y{f&:n^A1L?}}TQW8e'hBP-ݭn+Qz #uiP|XQ2Vny-.;YIF`;3HcW4d|?2e\7^7[luRA} z)d4#"2w*ny[6PxrzQ2/V@ /x7λڸL)0L&')F㧁Eŷ-bYgʃXvr[#)//fMacf%Lͨ}juT.#|# o[ 2 / ΘŦ39nP{lgjc1~Ьh2<' } $R#ㆵ'O(<H\&}\yh="|4,2|R;P;H_mvj!b\iX[WHiR> YZ'h8)X3kE;S)KW%K">EǸ\X*")*Ɨ~XR)+֤6чu MLO piGa/1>dmKJ@עBMx_ ZXfnޫ-\;SڄEiy2qG+r"-(BDK#/g@ Z&pf rQvkݩ=&>ܙ \MFab]iѫoqss[-o5~)Sds%H@PY,M]NIl_K6Bh ӓ.(ʇ~~)\&5ohF/Z~XeCDKUa"![]0T (=lZW3&(qD"c&W eNF`ZJ-OYF+l/4Ib#>xi. 'RBԉЊBbG[u`5U0 vyڂd`vC+cyjB7G([ʝSi- CCÐD{է[$c%nr;ܨf{|XE !#`ETK. ;o͋-<`2!;I=`#"` \M öb,HB, r<]}WK*K7EM+ZE)ߎW4$T*ӹ*D$ͫ~h&]GD?izRwGl.>b;~/՛{U`Α‘n"!*d7Ĭ{ur@zn~֊K5 ![Ar6.r铌qgx 8RgQ+`\%!~a%Zi6*X AetXM"֭&64:7N'Ϲ(M:*+ 9]d&Ŀ8w4>^&2+d4} 8;Ød%Kk%bv5q ڽQًOI:o֞#?/GU%L:%rxo?$r5%q~Cًé禕Ԃ@^vahҸ|8ޒ߻0)JL^8/A?lK+)'UE]U|${#Ea"?rd'AktJ "})R[?Z"oBp@~IJVJޏ1jYJKb+`eV;=^هrq~H)"T>;:qE]xB12mBS 2ńra;Arќ'tTz¨jBzh*: ůQ6T0|kgKӦ`Ih x??33AEg~eeE@Vn,m-^I(f"jÄP..-X5VU %5a9AۤV~Y++*7|qԼVOasS]~A7HĮ%0ɺ X:n— %ֶI(|M$NWXWL ۸^*r4ӫNtZ垙f j= `YNx]t[g aJ\[[1;֋( @Ѯܴ% kZͷP02rh~F8 AtIA+a"iY|C('r4brNE`1j{uUѽE`|`% GwA2ԑ%Zj1Z-Ŝ zFyڱ6৤ѵ{H:-Y3ycwDaǧ["*2J簤UC fq^htʛ y4rW$JuLAS)+P mL*L-JWFaZOQ!8ǯw2%(պԓ1 l/?>BEU5<ӓ(jwͶWʽζMr/NsTmH|pILЖ>Xaݎ%%c:p+Cq)[_cȷ6JP+?u'k̄sp;XlɗgNGdʵ4 j(Ȗ`d>;AIk+zf6ъU8Z}+`?.sG c)0]-y*D`ղ En|t_m'L流מ! rܹD0?. ^}о]rMŤ3A$Q[No:|/ ٲXc>޿Uv9Jdne/Džb/L Q/&(#[,x5=cJK\Dxef9%jQ&L#֟DZ<+rKRF/r 4HvAc ZɄa}%&20ƥn?1Dg/Ocv?Ώ{aNN/3x'Gh7D4}?]aMn!G,-=ͧ}iI9rB,&1b8OIRGRPq>$/KٓPrNwWf4ѣuR,iDJGMs"yGvW钨EEwwv(2g#MK'/gmԌyÙ"an }Ck-(Y!LyD D rտB,ơ_5_:GQyѣвDaznbM*J}iHQxBզ43f*H NI瘵NLV`?/tdҩFle6T\z湡̜>$вZ2TWQs_PCŬеB[ډ3214HJJl Ȇ; |?Q~(?]e#f^ %IQcPeq :' m"@=b KGĄ_G>&3Dی::]AW`,Nt5}b`:OR/dbBc&bIVlwDn>BR+7y&Wr( S1& CZ%4͍NL)7:PW4TOܭg piڏD 2w}2~YOe9i +ȠWYh8{բG֫z%rŬ=\c„br,o&Ha%(=Ƣ&n! 'dҒS K-xvplLٹwW@囉M`UA$ňw^]SF# ,)jLM=4x$!e<;jmVTgN)!@l~ŤtegC/@^LˇHU͉궗d+}`J3[m[Da_ZF Y549Cil>G'ں"sM`?sluDՏ" &/3$'艶),c߆ vhT4,'>jM@$h[3m[Kѵ 娨;Ç-3W;m >RmԪp2ʺ!׍\Pl/h _/Al !<#&F#`` l%UkSQ~&t* yx2 ^(yKz3~+x$ھ˚㎠C>mv@][L{o'E8lK9I,GAXC6Rh-ࢸ_$-m>XLLk3Ss`ORQjOI(&DFblv igddLj*}/>g_<[aY;̮~ )UE?LUŐCŢA/.NvGgե `2m@z6NuJT[E!P1`X0"ka"wl&kpΚ@OJuEr|^*)P<@z40z*sAG_pme)Т#)5<ߢQ߸Zf'JMǓFM߬ya-$+Up<2鍯ִl('e 깇xrhP' sXl 4d7Ŭ?2s~ ɏ X1Ƒ{92T(Hi73inUI@>\GɪX; gC{mwJ5k:<bCahbS.AYY$Kٔ83rȺVMx yy1 "3aI\?UpM dH:=֡3Lf Jpqs)o"υ(9A֎Y7_t ?+Q}<Ӵl3 +V7|=sY&{Vd^HKt-r0aZ2sya[:G qFP>b&?[R+tp?u u"b#3ȅA4h{'<*OPy\N|6/$qu*6iZؕuf pn J 5 AbCWiHEC=o}5Ey¸V>}Iİ94/ `n%\jVwBnTrKҩ_W6h=~_{ 2*~Ϙh*;0}}$~#֎ӾUZ0K^mscD)zq'X H0!?#'"JhV`k&m7+,CMQrڗD#^ ]샪:H8>羊' =,b4el^O)YB -yH|2Jq<Ŗ/&DgԼg$YOdU! z^)v9_*ݙOZJY1OrƬY.&~G.XDvPBHKib\K119 Khٰ-}F//t\}*̙D:uljŲ%i]<.LL$dS@;UЉILW=CZt%uOzճt^*i.u .KȔăљ2Hךۨ BTkRF}5Sm1C(Du$]B+!y1FyC  *-&,LZE uQ_wfe~Y5L9Zڷ{ ,_/gtxL S'kɕoY{WmE{\y/8,dp:^](RdY Ή'9?]Uf/hAҸF:''W1y=Ns1P mtr]v|2N]nsR덥x{"8TO)_\!j+b 8> stream xڭcx%\&vvlvl;8sǶmv:ضm[dy9s:7fΏꮺjݵjS(0%lXy 6F.Nv6 vr @3igCk_Bhlag+f hMb@c++ jghaf VS֠O?.#@F:Y(pZmR_gs ) TPHmPt10Ym4S;Gc;[Zsb%08-ݍ@{ ';pX[S_ݿ waKhdha UQLu::/ 3ibgOKE -l@wr&Nֆs%wW.NfY=hfhb trK>[ hm7fpL̋v\ fo&v)ߔ;DoEyo=Wj kkC=h mw @ϲ6tp,?6wN!lkW!vF-$,܁&SC뿗/ W f[[!nIIRNMDmw*U=쁀ICsx1ppX9>ƿϑ'X,ohffdfft ?slhkw;4[[3 Lpe/kP-.K4x allX:8=ŲI^nQvp2!f\hD{]/@hq0M*)땾CNw|s~'s- ckVىVWt~Atjpldx.77$Օ =Å,4N ?uG}дHSÚy#(? =;@,8尟 eNF-cj*Ki;| l-Y*D\O0 rV' f{bkeJlũN vNJGT`ZSy&佞tQQ1e8Q\Zi}^o);э^>7(]=r\S(fts"݅S1RvG+ptsSSI}!PΈ}.*`Tـ04 #V> 72<*mI^*' CC$LK{nT롏 4R,jfDl~QC l}i&wQ> A6[k|b#:w&LidCܢZãHVg=Xq={C? TFjA$ük~X_ϊ+r:p8rٔ|kTXdQYJ8/vr3.Q#5X8=<>ⴙ4y<RV1iʶ"1[Йl/bd͓:~{аhtx@LIѼ/7gYBbE<^Vv`)RIW 4I% KCɐ+2\ Ʋ-nA,ǗA0dY; t/ HLER:h)3zzKҔ3]`I %L6fdB'&o&=#xSOVC%CU,Q(K5E^&P<xylX`[x9EpSV+kA].bts_3GCf/Njۂ⦃x"=^ܺPd}ٜzjiaEV"O s%mo vD^G ]dqoD]Vu<ɜ9YSyfW_ͬbc֓՚EmUAָ-gG7?$/VI ̯)sr>}eSU/+l2$W;N!!&]Ҹv(|nS^5#yC7vdP.F@ D;b ;!(ڴo[`00/'-Pw𾔋Ի)6g5S7O%AR<|{oat7kѩKbsIO8ucEcٜ,)p\fϮ"|ЫrVvwkN{'$ ^ꂰ9qW.\ ;[59zJH[> b{l^oܦEC!#X[~;%¿ٴ8s+sa#YD I-os1{S k=-L -x}|;pwx?%_Wˈ o(Q ѤE`K[yGzs #1מ83d(*9%O|Xuv .ä'BEe+"WʜS!PE'b`ҁ[Fq]do&PZYq\FlPL ~Yf=0My}m:,ghw;ݣ/?ꏓ/y)t, OE1ռVʃ)jX@r0דd|+q A[Yvex''qcMH+5Q $~8|3)SQۨ. l=.G_r6 rI9D}Θ:erȼ)*S[$l'4Q$b}H眐gӴ$)d5n>UuDf.UOhhİ:GsQWf2O9|dg4;m&Ɲ4Xp ؊a[o"< n^M@e9mu% (UnÚdeaX r}|'>lW冭́bDeEF9~mTzf*# m@$oN^O|Tvnne@՟b7!3no94OE6Nه(_E˹eEުX;}nv[% U?M4騨$aLE߹ý>`+GT<,*gC8{BRDw8zB6Qjw=7FϫCc?奶h>V\DcAylyx ެirg{nj7'm%yi"Wbb/!*2/Q#-$?`&ٝ٬V:̑_$їk.}n:&ǡ80IHh^ތv{b`JIN'S"PԍJ/hծ|Dx?8g=yA2a6+!v)cs׍P#tqucuN<-O[f7Z%\?O&sڢo`h< AH"P]K8L VP$=v č1MaS\gd[Wvp|=E\_݈ L~Mw]iyB}D{GXWZf0÷!6js^ i bVuJ?#bNe($FfJ2"Yzj;2{bJGhhrw!ijboZ)07/ 2YHUu ?N y'qTorb=ѡ8D?]6+bTc7fY<2~\nvN-D=-qħy&R [tP #-9mfH* )\Qɴ3H<E[Ͱ'h`) u>lPtAwJE<}!wcU: :q >&\ wpJG(՘5[bfu=S_8dľqeU:3᪾il 0`JSZMDs~ӎK°c \k`NGZuْX9)Nq dT7(lŸ`N'̀ ڜ81abis9'bh)ux7& v>*4\j12ڌ]qa NVtFP@܁dQlFcԹ**q9'whsg7"N2 Jzc/!WO]|/2ȓqt5]hY>i9 i@ v[ı-M׹;~Μq? f ׯLvoI||LCb0;t]6@#qXV^,<!IW]S.s XWcjg(I7'(9FTi r=ϦY#/Ot$cF*"SLOuƅyZ1aUTR.xdr@1rJ skP&㧆Z0~|Þ$@~I>@o =?:0-7( - F1+m9yCn(q#)-V 7PXX+0Bk.W, ěv\KrJ7ps#?Q遝Ƅ Ag?|l|{d:t_Z-kDx2;{A;SNn`%.}kƥՎǸ-a`742 rꄕlv h9OxJ`KƤ&`#TPWd|5y':"Ceɭ䣵9aV7N..&xd̓pxG;iS!zﯮ4!e&f}Pm-Tݔ_<)"+qy+k&OdZ85}jǙp]bs)IMUh>OZވu+|!KvaJVUkQZm won21}@"GMU6걣ot%>D~cAMuZD`ۉ[bx uZ[';j&g9ϪJ@ܥ_.g|7åoFOrD˥F#.0 aMҴ*8 _>kZ:P~ }" MK _r0fΦV?pH&Bs$bY$w 6R@C{,xՆt𧀭eE~}V[ YQGp-Qf bN[ԬÛ^[iA3Q㐑j"1VBnV*Nq~/&Q9_WEc3)3$yٰ\P$VȚ?>鞫-nBG0- dd=2fmr8xd8!nv_ Ё, `e)\>Xvxe)Ǖ.Rc;$Y&H 4N/tnbc8FT f!ŌQJGq]EMhR=OBt(irbj!3K\TUyn#od;Z,>\FX9~`3u?\Fp='c*i9j7t jAC-ZT  _N Hs?XhaKF@$ǿb7hG/;t(9N Vȍ|ˋ1sl # hC~>Q{s+T)h!4XdX1c6?5G4W~Q]Ah@r-n>_s[6~$^Rsw8(Mz (c"L Md ,/:$d$JbGΈn:67x;$ ~H?U]jD56$};g :Pn\3%5$"n!6E*Je vLB㑥7o,0\|m_PmLS&)"qq8ht??k<5ΌXt MKKȽAps6ua 9Z14%y ^]0'I=CfpcPc^ȳv[u%,ta+P(8hS$h kiNɼ_{"Ф>!#6w C nb:/|q2ᔉA_Q3]璛.QF-k.VXPE@z/a"^C> 9DdiZ5^K1;c`N"?rp\_^'&H]̹gTwgk~}~ \ gl'>^C@jI5*$Gt% f"Ш9%mGUO'FfSHe QNa}4F18@6uh7E2RFcԕc+qz}B$YN&8t ڪ'M)SwiVD9a}oy6)oCDKDۺ?18[ .\?8}CيV DoP!uЎ͉c>]wXaBD6t .i R[o 2^{g٦V}Hl,պ*;A2I92I`cԒݽ%:o8њ,.v*-r9gܾӊM^Cu)ruG7.d,CU6C aR%è2: 4L]b}\xk1XrZ6Fk`>jlS mviVץ@"/"{Ŧ peة;nU=^*v sʫ6^PXڹjF<]s%XŐxyls =s֔DY4gP |K8})v6q'Ѓkwyߵ )\_ #Kp,G# ( PG'y\bO$KqB)7Pc?Ld@2'?Oo M`UU+sYl0R`K !)n.5#q#G]hB+Aw}/Zs1:AwM78x%9FVXjAptUÔl}[A\6BdKiwq}k&g +0;^2`KgǓ, }ъOeU>>Q'Յ$?#cG=62Srg MX=PS95!d~] 1 Dg7x2Rv;m| b1& !`#rnskFZCՄ]UzQ~ְQ=Δ6sarX AC|\QޘUI+D혬D{GlIЂ:ѡO_Z.kCĺfe Њt(D:M \8g~py(U!WV~dYRry'a|7YZ'BVWfN#t.2kW]Ϡ8EΓ&9`@ru?wl2ɲ UxҜ5T5kA7c[I^/.'˓iyJPT##OesòU{>O,\v}H >8Uui0Kixħɂ z6%3+M? ݓseqN¿t_(JyOϕmU y'CQu,׋6:ghY5$Q.mTi ožFrlA"9RI̘$@C~ Ex2%Kt=[mUme8#^`KBncսL#0_iHC!,Vfz)Zg ^UFҖ%呝1If4}Qjb1J`;/6mƨ)N$l@DeKvX, )zwazBw7FM4>3 %TwH\֑o"d^Ib8dͧ?(A=k!|܇B׫ YX)՝+0dE0P|&iGtu daoo6<(H<鷰" aB%`?J܀OKwoh{/^T+ VW[K8maִ+qXCbE67E"Aw7u}x`IOXmXفE/c M)oJҨa%{D IM 7rmwWs=֠E[ظ sQ)CSVhi[k+qaJiXj`켴D/d})MNL uQ'_Bg_) QZZy)8u3+lT-A&hNRPD™*PF72ӓqr.RĠ眏v`cH+Nຸ\MIv|/9>rEPqT/Y sZ (tNkFiTRP]x8+ DK<흱ԕIFxȜc@ iIF,o2u7(AQ',5t9p炚Nܑfdm=lCB"C TQq\X X)%1U>8mL/W` LTO7ApwB_6?|b>q|99;Pd͜Vbm?&xe.iegНt#"4D;U>jZOdc:֪94B)|6騤 z3hnf"*5 BW*{f1&Zb[+r͔}bX9\aeb#'Isb:k>̫.dMO/G^/NZl_4W=F/_ڐ"苉_IJ " DK9:J[g$!7W|^ցfk01lfxKLO&h+ؚ(rWO ;h XV&ߟWG4&"\QLk_fLyDRYPP\',1tyE:b5{a1&&F{PCPN6CGsY2e+(l@ꇼ=8! *ʵ 6KmCH9S{Qx0%ɤGthnP\Ɍz3R\HTߑI Sx<"S8bI\f֥8՚(Bñ"X(ߢ73KŹx}@ fd;/>3 fBsu06G{Ddp!7mΩ) =o@›߄K-I|4;e+B^MpovE1z:[3+R|-'ƶ_LRQiJ αF1>二DxiD L 25S?|{ =Kk5pBRS/ts]敛y6ne4[~1paؽ_60]Wbn`|.0|ߧl5~@ҏ#[5/TóDgYU}_R 0N@[(I7A,S"lT!n:S9J>Xp0| $ݏZT^r1c=L˜͏5t/ir{BU?>IUkjV;MqПT0dƗ;;J#1;1,+7Lb 8ؖtM,ds_ҖN-+p8%ѻlړ>\ECI~ ϟxtZ;VW9P8|F^U2,{^1-reh` 3r-Lg'}(;Hs*%%;]bdd=u!`CTYe<ly5UN(a= 0IO宻G1˶~˃|tShϲ2 X&eQ[l;3glp[ƘWGeq ʎ3|}9Y4Ec@A7تU%̐EuB njm{X[x:DZ-BʏR{0?rlaS+aW庮k)5:q\zN+kM1{F{6ڴ4CQd8xηw5-0{\ɡvt'T]`wZ8NvC-LFL@`oxͤ/IL0(1F3z쥈sz L|#rqD[ݭU@z (B#, 5I_ w2%3n,ȧtߺh C0L]F6czFǍUP JBa,VIgf"u!_-91BdvƾŧMSnHح(5Npa?w endstream endobj 2350 0 obj << /Length 900 /Filter /FlateDecode >> stream xmUMo:W5?$R. d9M eCkmCp;;w~>|3E_?O]5߶w]Occ]=~?}Oyh9%?۹׬B|Ɯ>);vw%g43>\ 6 EJ78 1{~`W(-;]%=xe_,b+-O;q\L}UI--=BKE1p[! Mߊyu>.N5K)Wb٬8i[_uʕMzQ)V(Txޢjy!Z2P="Zd0\ÃGR\).2*Шa!U,H`+j.5Nα@VK-x%3%AYӀzΚ>kP#5m0Woþj.ZT$X/)n)#Wo(oRZ $Kp4Z-b\1ܰJ P"GXQi/8k^Zq:Zs9dB )sL-7xJ`aɽ)f$1 dъcCZC<73JgznHȰYɚTa,_-O87}KԴܗLloK+gJ.GZyVc48Wt]:P~`rZq.n1] S/Pu7Ue:?&?!d&1yHn5)yғBx#1ޞ]Go׏M?X endstream endobj 2351 0 obj << /Length 665 /Filter /FlateDecode >> stream xmTMk0WhFG*! miʲVZCcYy#9햅ļ{3񸟤e&Oo]&C]]Mq>zwt߉Ǯ)n.pCx?nڽVgx=itO"i [\l\WM}'ԭ̚t4pXeȉeU oq yM\-CnCW_Ey}wP dZz891euB)] W-\v\]~[S!8&+Zce"'2Ɍ5I@|"B2AQhSlLء28a}ɑFq5ҍnnbfǮCG= Wܢe$g;A,:sx l=NOTƘ$0_س/vЧQ%~Zx pX2]$^qnaK??q FqMyc0=) &l(mi,3|d &\c ]͹&ӈ9w{d-tx\ \cΜekqLJs?<@>qhx .׷8wl~1V<*m"mmDa endstream endobj 2352 0 obj << /Length 664 /Filter /FlateDecode >> stream xmTMo0WxvB+8l[jWHL7RI;onDo3ތ?n~<&Y$ŝK_IsE77E[^N\5sߖ;7|[lzmS_*7F?h3΃;mc-bB`ew\_7oK׽;(2Z.ETz}ܟ~o9V^MVK7-\f\S}[S!pcSs|TXo1/ȡ aeuC> stream xmTMo0WxvB+8l[+ML7RI;onDo3ތ?n~<&YվI|/ŋ;t硋nn\3<:Wj\=?-wn6pGۦ|Tnʽgxté7~qzxKlqrnX7UޞMjuSAxHiQ,'wͱ 1}hW7q{UEݥ-rG*F>NNL7u]tNhWS;wE )b,#TTHy=)9>*QKr7P:MȡQ^s$LD6aȑ*s.$S56`>ƄmÁ#TL 5kd}WXssc*zRh/#? bE$L|ږ8^y>eSQc̯bV̯cNa'_OAJ195kd3EH@8ܰ%~As*=F 0`{RLPh33Y$LƹǬ oqMsȼ tx\ \cΜ-eksL ?"@>qhx ׷=l~1֍>*]!MBa endstream endobj 2354 0 obj << /Length 665 /Filter /FlateDecode >> stream xmTn0C6U@"mTt@;olvR3ތm~<&YվI|+œ;t羋<]3;Wj|{}[ mmᆂMv{Kt=c_~B?zxoBS6wBJ)X7UaMuSxHiQV,4$O;nC-bD/OCnC_n^ѻs׽9X2Z.ET~{~ʶrn_~߼h!R,6ew*ؔb%k e+Kӄ$a"1x*s.$S56P>Ƅm„A Fs 5577vرϾ+uaя6R:!,əCxg+ѧy*JcL|*m:fvuiWUꧏɩ\g%<Ϛ"sÖ0_:3x0kjhyIYx0aCnOg3$cx0<<v5O#ܵu7A 6*sZ ZcΜ-ܠeYksL ?"@>qh|tngk;dGGM@c endstream endobj 2355 0 obj << /Length 665 /Filter /FlateDecode >> stream xmTn0C6U@"mTt@;olvR3ތm~<&YվI|+œ;t羋<]3;Wj|{}[ mmᆂMv{Kt=cߚ~B?zxoBS6wBJ)X7UaMuSxHiQV,4$O;nC-bD/OCnC_n^ѻs׽9X2Z.ET~{~ʶrn_~߼h!R,6ew*ؔb%k e+Kӄ$a"1x*s.$S56P>Ƅm„A Fs 5577vرϾ+uaя6R:!,əCxg+ѧy*JcL|*m:fvuiWUꧏɩ\g%<Ϛ"sÖ0_:3x0kjhyIYx0aCnOg3$cx0<<v5O#ܵu7A 6*sZ ZcΜ-ܠeYksL ?"@>qh|tngk;dGGMc endstream endobj 2356 0 obj << /Length 799 /Filter /FlateDecode >> stream xuUn@+HɁkc{!5&Q^ үル!zya/W7/~jyld}{9}N=C'u\W;oέO*k`~?''3Ɖt3\;WS]Q?SVk ]{9FSѤoG^ 32j$WC0h޼O~wC4Sy<&>U]Rn·ÛB~,{_=ڰfhm_}4zu|sH]Wb MLD!E!B FAC\dQQ(%T<#h^QqKƊL0cF F͌a._Q mPG9'+X38)+ι7\'~5:r%%Β뤧$1$܋a %aN*Atg&W̡`92/X[B|fAlI)dKdgI$[d$[H$[hv-|9~ddK%[w-t--d ~)BO)Rd dK|ɖNK)K)++Ζ]Rd]Oz͜|x8?<ᤥNO]?p@}_:h? endstream endobj 2357 0 obj << /Length 550 /Filter /FlateDecode >> stream xmSˎ0+$1$#8h@Bܰp`~ +8*=SJ]sCM&ESݮ`w z\ħmbo'ޚr028~}uHXz_z.XA_`1o"xR:bct\$7҈٘TmH@ ]W0ywznͩV+1r]oś}X 6g1ͭnm{!^ ' bނP48YhC`୤\UY=0ZĎiơ 7([4r;"A"e"qDgs"2dK$#В%#KDDNs5&]J[/G endstream endobj 2321 0 obj << /Type /ObjStm /N 100 /First 966 /Length 3405 /Filter /FlateDecode >> stream xZYoF~_я/`Q-JG4ь4coU${C=@XMvUWR0ˆBPÙ##&X"B)X %HVXq( ` PUHm4RHk=R(e[* W5޳z&Vg+9`nqlGNIzI>W0~vDuuYsѣbZ# ~\1%x~1]\.d俌i)]}6^3TWo[縨P]<8D14Ⱥ@cQC'WpcԑV"vp%xFy]G<=5CYLi4A? xCs9Ȃyugs^YG]ې#_ima<uE/| ޡ5 v]Miyk]*3Bԋ_cMEx45'}w?<uv" ;el{UW9넘p]6 s1G_\X/Oًw-VqE0lL{@-d*e'o$Nׯb0ȁ ``75;Ϻ10^@gP63Tаgp¶ڮS7mey"ՊmR>yzzhC Ⱥ#͋3{ [t ?ߙCdtZ"ԟkS3ޕfNU۷ϟ鴚9 p 6fFqr7ë^m^/ -0P6g|aaB &usvS%%mF8{ Qqg?H҉”F"sL?{?P~h̥>L[xVBLnc($ g yb0XY*g q.2,2 /}> /}> +}6 +}"̋"`0\,Xx߶Wߢ޶WޢݶWݢܶWܢ۶WۢڶWڢжuCe endstream endobj 2363 0 obj << /Type /ObjStm /N 100 /First 990 /Length 4644 /Filter /FlateDecode >> stream xڅ\M不4eֈd=G8޽y|Pgr;&Sݵ?T"$_ H<< HǦmmi|(Rtnllw5mImn tmm[趉77ɦFFZ7J0FPZ+|r`uw)4*eT#5Z;?Fh]'HO1.(̄FMK_$|JbX>&C9eF431 %E=|؈1U%Ƈ=жƃ%T4APAMI Q"0AJMLm5Dt d'͊ #HZ aJڠJd0J!6 z2 ١}i0e`ʕG}`MH#5kGn`(d¤+1F- `8 `Px tL lt؉` 8SkF!r(52-ebpBgx4-)5gaa@'i0c` W ּ ּŸ0q{u#QmZh> ^L 7<)U?~vþoH|4鿺KpeQpa* _@nHԲ|R\\y>raȮ@0j 4+vlvv"ۮm9V NdU ' dlJd[l3mV֜l#'*ٚmD^%[s@Vd+NVjlj"[͹V׏$|g0 ,kw{.6oOm{χuw|'ܼC;q$ͅ4钁|/n)1Ǎ pM7*on1t>5X̄ swån?ϛ ~m~( @=^'hJ׭ߺ xbsП 3*u<;RTG0E3Ejk,Ds_Mzszt7`,}dr#TesRauJn|}6Mv6(8ÜO o#:/1x.'MRѨcC_hL({Ro/!8f)ܫq 6=0f B;ZqN>?\ M1b,6Q`llkC7lKdQHelWP!۩Qٟ= /W]#d|E\pd_ඡ96k ochƮ)NC[qMX> HH B1tBd߷\?7#By?6fWci)hG <3f!-CUJq 91^[ vGkB1v;$-(|)EA@Rѷ=)G\X*I]|:o팣 BܪJ1!m_q-Pp@3z4U7a=7vyoC{O-yqߟ6Dx5W1BJn" 69\~n3"؞ bDK}Dg6g}ʾl3mluMz&WqKJH ~[ck P[AH=<-/?xcM/6bdZ_{}G i-HӠ~w"{R ܲQi IMXa@K]Eg :tZ(l/{ۿv=tHƯ)'J*&O]j=1d1A8ٸ1dB[vNyXv=# 9\vlK1| n}[b}K&H,wP);ABrWI-?^g/Mԋ~s!Z2^︘xSR"|Qkuo, 9oN:n.ͶC%F^p6 }o"ԥ۵be8Xt-@76wyPxz0J|hTD:}(Ԃ٢uVF\2d{ANC}9}χ"[QA(]=*kM`ˣnEǗ[ʚ(4o}OnMa~j>zJYyZyKyeW҄f4S錋M9~yXZ2,Q{>LjU~U?|Ʊ)z^TaC+$/Nyei&~RinC[GYRN_5hCbGCAvg,_Cmj)wZleoxa|&(Bߧw/</gӭx(Uӕ¯a 4|qOiZZ r4#>8<:c8sOB[8Vӷ˩֮k,Ū[JQq%RH@žct/bWW ~4lmռ`l v`\5Xi2_sXB}*X O_K<?b1b-I _|KX/)1CqXG]E!VeS@j# Uc ~cX: ~ |Ӂ!?2kt"ӱ#eEz_sNU*tHhHB[DžփxeR6P\1E}w > endobj 2448 0 obj << /Type /ObjStm /N 75 /First 760 /Length 2912 /Filter /FlateDecode >> stream xڍZMc GUa oH`5RvG0z;{Ypkzd}uZmY\k_Rc R -y[A]RjMTZ.I/%$365<7'cbG㏨SQYrW_o)*K)>.E|:.&2tJחğuܡhXzjZYj/"}H)."3[:tXTd);=+$_E-*,:du-0@t1ul1-ckyu}]gz^[Ҭ.=di=AfKOcˊ_ҫku]8r?tUdK77Y棺 {c,^plZ]X7KZPZv)a+هe {]}lxvhKIЖrbC[*5 mEJ~phKFЖO- Cѻo{݇=ߜO-&./_ \ Wa?޽Ėm'x`4_n\K5rK銧 DO OD{u1ash/:8o9 t[0ᜧrત ʫoD#%X $HFm'FׂXoDRZ"96؈oo,NF|&9dT]I$/v/Ƅ TEs鐏P 9 u0ܲ A~!ߢ!/$` +_q (t2(DmQrat\'in ƕGJWq*`\ C;\ml;aʍBr칪SR0Qy#c+8ot*8oBrp\p^np˶f0ygsygs9;Zqiq qZ] q(rP*w"\bmuQbS 1rr] 394BA޳b 煎sNs<(ݦ:M? sX 6p@NBMㅁs{asN 2p>@G6iќs: j=ź(T'{{-4@YĬĬP+Vv]+ m1T= *ʞf- b- b-tP*:}~i V: 0v)σrh\#ɓTor2K= Lo]>h'9ҺJ{(+bۧ m'``}hlN ]J<݊4 ~ JީRr*Eۅ*,N0,G[SW!'x^)(u%pu M& P`jS wε8J)FAST} 8ԣTjx1'| jEs! jE2w>a%)q2^zd!J~RVuwV*sPAz7HՁo Rʘ/{D 21{g?Z.QyG moiUmmVyG~Ra{=Jc?<@>xڿ?~2ഀNn{ŝXOY @UT=xaXהwrxwV?~M_y\Yx !OF}?F@ytܽ,Ϣ_׏oOs ] /Length 5906 /Filter /FlateDecode >> stream x%Y$QvYY]UowW{]o+@#vifQ! hddQ$ <Dd,%`@i1˿̸wosn4F/i6&b bb5Ǡ%vIl8,{!Ts!,{+Ds7ͩqf`2Ś2 bqXb{iNJL,*X-6+@s {';Fz V,.u#l- [Nm 54͝k[b5wIO{aؔGp@̓pH{i8,YlNm6fw<Ů<'n<G~k3pVl+sp^K .]{y.]{y͈=Ӽ ךtSu!M%9fE3~XG3h y2|&Bf\ 2N#n+bw4ƾ, }oi#bqab[nh>[DŽ`_/Bkb\k]4z Nm` iFyF1 0M}L,嗁>X!6'v^`4_ `}V`3't`։=;iF}a>!} 0`Wj؃ӱ`=l8lă}`;80`H88 qjqq1qqq_ŝ[>D BЗˀ;`OvŸۮ#y|1|:1ۚҦ#xmk*I(܆'&G6 +`20X{`4cp`ysxF60f50wv` σ/7 8x-xMg^j,862Xnj 0O&9@.!& WT`2fEj "`Hvo 1_jƩ9gVjXka 6djln\:Xq&O~-6ƾqV^}SxƏ='7C 8{S!8 Gc:LI8 spƙ_"\+p;6Akބ[pb݅{pԸ34f<'P?RûOS9W |6S=DlԂZ [ SgeHb3QLYAM{LYM`Y6,ƦBKKM pƇa,C<%YBpN8\tnKwu7.N?ĝ䵩`ʝz 1qIZ}W׾0  7+4ot(omAáqotx85GG7:u&7:[aIjGxtx XsD'ѱ jNrg[jEX]wXC;LƯ8wNGtN;4xvاC};R~!:lѱޝ%mJw uƷEF=ywhމ#N6.nퟎ^▐V鼂xbӼC; |rw.uUӼ6k!5zo r惚5 '撚Kj.-jި9戚K\grs!t}?5R+2ިy6= Rt1 R3H 5\X`C~{q_< fX͎t+[ `9X NJXa 0ҏ6z[~}2mmwV8֢`KJq v `?p"?ȻO,z`JQt/ sIg95n;z"܃,߉.CL2:܀;Wfݎ.W}sDO Kҽk}^ZՏ2юJ37l9}IM;K\>_+]1MiLg>sEp&/8lя[|XHZZ;: NJ~sIK>G9ҟ;rS-!۔/g{3 SɟXN%șA a,E0%PӑLH_'`56FMo-t<w mz7쁽  i8 G(u)T18'धfBTpHݖiOanl4O⑮Y8! ɜ_s \U)}nĴ9636}Tڦ֦+MK`&YN?MCm9(MM&Vl5ȎUFWk˿i)F-|K2ߒ̷"S_ҿ+o)d-}KNܚ_|&d$!KI[КhujNOԗ$-Ԝ)|;$0ԷI!!͇4|'KRsߌØa`RzhbH!Ath-f `15w\CJ)=!iL!U)7$pj09͇4|H!͇4o_È<$C"o/:0TRuh` 5gvtE![ HN3z萖èCw"5/#28!rH;dSؚ >oD);A35i4\ ɹ J͗ V0MAôF;`Cje+l?ޙfy|>8c7wp N2,g?Fg .E7\IͿ8܀p n ><_cgѲ'O)< ^x `bXlKLͯ-z>~(5'UAXjW >EmS{GXTaG^葻G{|;߼CN׎]{;D.j}QUPsQf@1.a=۫`;^pDev9؁nz(nz /RxzKEƠ`{mOm„`!,Hw/8"6pt{hmXKS^i6 fVͰ5` lf vŠ}qz`3l Cnؕݰ>!8g`jo4 .E \kp Υw~5z_܀p=!S퇢Gf Gxg{庋|]@7d4vp{غHc ~%bK]vIۍOvcO$cݍ@.-ҲK.-;] v= G@ժVUꞀ&-8Iwiޥy7f;1QߎYyE X= ]f2CUse4vwKVJUҽӏ@nx]钶{-m~K_.[t٢]wu;b vt;э3n\exct ڀMX`CRXaLJXa )XW7& [`+lv. {`/p!p18'$p98\p . :܀p Z~܅{pCGf \kpnM܅{pCGf L_Oq$?8.f8^:Γ'F LƜɝ3g9Ufƴl;wg2,ޖgL3{&dL3ST&xZ"WU02Ly]6StۤϜxijx5 9 LY$;6K2&,^6xvavFf^ʼ))f7VY5̎g/};"g3;˙=Bw/?{M-y9ިX/cb$qBV +ຂ +ൂ BX !H@R |A /_ |A /_ |A /_ |A /_ |A /_ |A /_ |A /__nH endstream endobj startxref 343543 %%EOF dplyr/tests/0000755000176200001440000000000014525507062012554 5ustar liggesusersdplyr/tests/testthat/0000755000176200001440000000000014525714672014423 5ustar liggesusersdplyr/tests/testthat/test-deprec-funs.R0000644000176200001440000000646114406402754017736 0ustar liggesuserstest_that("fun_list is merged with new args", { withr::local_options(lifecycle_verbosity = "quiet") funs <- funs(fn = bar) funs <- as_fun_list(funs, env(), baz = "baz") expect_identical(funs$fn, quo(bar(., baz = "baz"))) }) test_that("funs() works with namespaced calls", { withr::local_options(lifecycle_verbosity = "quiet") expect_identical(summarise_all(mtcars, funs(base::mean(.))), summarise_all(mtcars, funs(mean(.)))) expect_identical(summarise_all(mtcars, funs(base::mean)), summarise_all(mtcars, funs(mean(.)))) }) test_that("funs() found in local environment", { withr::local_options(lifecycle_verbosity = "quiet") f <- function(x) 1 df <- data.frame(x = c(2:10, 1000)) out <- summarise_all(df, funs(f = f, mean = mean, median = median)) expect_equal(out, data.frame(f = 1, mean = 105.4, median = 6.5)) }) test_that("funs() accepts quoted functions", { withr::local_options(lifecycle_verbosity = "quiet") expect_identical(funs(mean), funs("mean")) }) test_that("funs() accepts unquoted functions", { withr::local_options(lifecycle_verbosity = "quiet") funs <- funs(fn = !!mean) expect_identical(funs$fn, new_quosure(call2(base::mean, quote(.)))) }) test_that("funs() accepts quoted calls", { withr::local_options(lifecycle_verbosity = "quiet") expect_identical(funs(mean), funs(mean(.))) }) test_that("funs() can be merged with new arguments", { withr::local_options(lifecycle_verbosity = "quiet") fns <- funs(foo(.)) expect_identical(as_fun_list(fns, current_env(), foo = 1L), funs(foo(., foo = 1L))) }) enfun <- function(.funs, ...) { as_fun_list(.funs, caller_env(), ...) } test_that("can enfun() literal functions", { res <- enfun(identity(mean)) expect_equal(length(res), 1L) expect_identical(res[[1L]], mean) }) test_that("can enfun() named functions by expression", { res <- enfun(mean) expect_equal(length(res), 1L) expect_identical(res[[1L]], mean) }) test_that("local objects are not treated as symbols", { withr::local_options(lifecycle_verbosity = "quiet") mean <- funs(my_mean(.)) expect_identical(enfun(mean), mean) }) test_that("can enfun() character vectors", { res <- enfun(c("min", "max")) expect_equal(length(res), 2L) expect_equal(res[[1]], min) expect_equal(res[[2]], max) }) test_that("can enfun() purrr-style lambdas", { my_mean <- as_function(~ mean(.x)) res <- enfun(~ mean(.x)) expect_equal(length(res), 1L) expect_type(res[[1]], "closure") }) test_that("funs_ works", { withr::local_options(lifecycle_verbosity = "quiet") expect_equal( funs(mean), funs_(list(~ mean)) ) expect_equal( funs_(list("mean")), funs_(list(`environment<-`(~ mean, baseenv()))), ignore_formula_env = TRUE ) expect_equal( funs(mean(.)), funs_(list(~ mean(.))) ) }) test_that("as_fun_list() auto names chr vectors (4307)", { df <- data.frame(x = 1:10) expect_named( summarise_at(df, "x", c("mean", "sum")), c("mean", "sum") ) }) test_that("funs() is deprecated", { expect_snapshot(funs(fn = bar)) }) # Errors ------------------------------------------------------------------ test_that("funs() give meaningful error messages", { withr::local_options(lifecycle_verbosity = "quiet") expect_snapshot({ (expect_error(funs(function(si) { mp[si] }))) (expect_error(funs(~ mp[.])) ) }) }) dplyr/tests/testthat/test-deprec-tibble.R0000644000176200001440000000030313663216626020216 0ustar liggesuserstest_that("add_rownames is deprecated", { expect_warning( res <- mtcars %>% add_rownames("Make&Model"), "deprecated" ) expect_equal(class(res), c("tbl_df", "tbl", "data.frame")) }) dplyr/tests/testthat/test-colwise-distinct.R0000644000176200001440000000165214266276767021025 0ustar liggesuserstest_that("scoped distinct is identical to manual distinct", { df <- tibble( x = rep(2:5, each=2), y = rep(2:3, each = 4), z = "a" ) expect_identical(distinct_all(df), distinct(df, x, y, z)) expect_identical(distinct_at(df, vars(x)), distinct(df, x)) expect_identical(distinct_if(df, is.integer), distinct(df, x, y)) }) test_that(".funs is applied to variables before getting distinct rows", { df <- tibble( x = rep(2:5, each=2), y = rep(2:3, each = 4) ) expect_identical(distinct_all(df, `-`), distinct(mutate_all(df,`-`), x, y)) }) test_that("scoped distinct applies to grouping variables (#3480)", { df <- tibble( g = rep(1:2, each = 4), x = rep(2:5, each = 2) / 2, y = rep(2:3, each = 4) / 2 ) out <- df[c(1, 3, 5, 8), ] expect_identical(distinct_all(df), out) expect_identical(distinct_at(df, vars(g, x, y)), out) expect_identical(distinct_if(df, is.numeric), out) }) dplyr/tests/testthat/test-grouped-df.R0000644000176200001440000001373714366556340017571 0ustar liggesuserstest_that("new_grouped_df can create alternative grouping structures (#3837)", { tbl <- new_grouped_df( tibble(x = rnorm(10)), groups = tibble(".rows" := replicate(5, sample(1:10, replace = TRUE), simplify = FALSE)) ) res <- summarise(tbl, x = mean(x)) expect_equal(nrow(res), 5L) }) test_that("new_grouped_df does not have rownames (#4173)", { tbl <- new_grouped_df( tibble(x = rnorm(10)), groups = tibble(".rows" := replicate(5, sample(1:10, replace = TRUE), simplify = FALSE)) ) expect_false(tibble::has_rownames(tbl)) }) test_that("[ method can remove grouping vars", { df <- tibble(x = 1, y = 2, z = 3) gf <- group_by(df, x, y) expect_equal(gf, gf) expect_equal(gf[1], group_by(df[1], x)) expect_equal(gf[3], df[3]) }) test_that("[ method reuses group_data() if possible", { df <- tibble(x = 1, y = 2, z = 3) gf <- group_by(df, x, y) expect_true(rlang::is_reference(group_data(gf), group_data(gf[1:2]))) expect_true(rlang::is_reference(group_data(gf), group_data(gf[, 1:2]))) }) test_that("[ supports drop=TRUE (#3714)", { df <- tibble(x = 1, y = 2) gf <- group_by(df, x) expect_type(gf[, "y", drop = TRUE], "double") expect_s3_class(gf[, c("x", "y"), drop = TRUE], "tbl_df") }) test_that("$<-, [[<-, and [<- update grouping data if needed", { df <- tibble(x = 1, y = 2) gf <- group_by(df, x) # value has to be past the ellipsis in $<-() expect_equal(group_data(`$<-`(gf, "x", value = 2))$x, 2) expect_equal(group_data(`$<-`(gf, "y", value = 2))$x, 1) expect_equal(group_data({gf2 <- gf; gf2[[1]] <- 3; gf2})$x, 3) expect_equal(group_data(`[<-`(gf, 1, "x", value = 4))$x, 4) }) test_that("can remove grouping cols with subset assignment", { df <- tibble(x = 1, y = 2) gf1 <- gf2 <- gf3 <- group_by(df, x, y) gf1$x <- NULL gf2[["x"]] <- NULL gf3[, "x"] <- NULL expect_named(group_data(gf1), c("y", ".rows")) expect_named(group_data(gf2), c("y", ".rows")) expect_named(group_data(gf3), c("y", ".rows")) }) test_that("names<- updates grouping data", { df <- tibble(x = 1, y = 2, z = 3) gf <- group_by(df, x, y) names(gf) <- c("z1", "z2", "z3") expect_named(group_data(gf), c("z1", "z2", ".rows")) names(gf)[1] <- c("Z1") expect_named(group_data(gf), c("Z1", "z2", ".rows")) }) test_that("names<- doesn't modify group data if not necessary", { df <- tibble(x = 1, y = 2) gf1 <- gf2 <- group_by(df, x) expect_true(rlang::is_reference(group_data(gf1), group_data(gf2))) names(gf1) <- c("x", "Y") expect_true(rlang::is_reference(group_data(gf1), group_data(gf2))) }) test_that("group order is maintained in grouped-df methods (#5040)", { gdf <- group_by(mtcars, cyl, am, vs) x <- gdf[0,] expect_identical(group_vars(x), group_vars(gdf)) x <- gdf x$am <- 1 expect_identical(group_vars(x), group_vars(gdf)) x <- gdf x["am"] <- 1 expect_identical(group_vars(x), group_vars(gdf)) x <- gdf x[["am"]] <- 1 expect_identical(group_vars(x), group_vars(gdf)) x <- gdf names <- names(x) names[9] <- "am2" names(x) <- names expect_identical(group_vars(x), group_vars(group_by(x, cyl, am2, vs))) }) # validate ---------------------------------------------------------------- test_that("validate_grouped_df() gives useful errors", { df1 <- group_by(tibble(x = 1:4, g = rep(1:2, each = 2)), g) groups <- attr(df1, "groups") groups[[2]] <- 1:2 attr(df1, "groups") <- groups df2 <- group_by(tibble(x = 1:4, g = rep(1:2, each = 2)), g) groups <- attr(df2, "groups") names(groups) <- c("g", "not.rows") attr(df2, "groups") <- groups df3 <- df2 attr(df3, "groups") <- tibble() df4 <- df3 attr(df4, "groups") <- NA df5 <- tibble(x = 1:4, g = rep(1:2, each = 2)) attr(df5, "vars") <- "g" attr(df5, "class") <- c("grouped_df", "tbl_df", "tbl", "data.frame") df6 <- new_grouped_df( tibble(x = 1:10), groups = tibble(".rows" := list(1:5, -1L)) ) df7 <- df6 attr(df7, "groups")$.rows <- list(11L) df8 <- df6 attr(df8, "groups")$.rows <- list(0L) df10 <- df6 attr(df10, "groups") <- tibble() df11 <- df6 attr(df11, "groups") <- NULL expect_snapshot({ # Invalid `groups` attribute (expect_error(validate_grouped_df(df1))) (expect_error(group_data(df1))) (expect_error(validate_grouped_df(df2))) (expect_error(validate_grouped_df(df2))) (expect_error(validate_grouped_df(df3))) (expect_error(validate_grouped_df(df4))) # Older style grouped_df (expect_error(validate_grouped_df(df5))) # validate_grouped_df( (expect_error(validate_grouped_df(df6, check_bounds = TRUE))) (expect_error(validate_grouped_df(df7, check_bounds = TRUE))) (expect_error(validate_grouped_df(df8, check_bounds = TRUE))) (expect_error(validate_grouped_df(df10))) (expect_error(validate_grouped_df(df11))) # new_grouped_df() (expect_error( new_grouped_df( tibble(x = 1:10), tibble(other = list(1:2)) ) )) (expect_error(new_grouped_df(10))) }) }) # compute_group ---------------------------------------------------------- test_that("helper gives meaningful error messages", { expect_snapshot({ (expect_error(grouped_df(data.frame(x = 1), "y", FALSE))) (expect_error(grouped_df(data.frame(x = 1), 1))) }) }) test_that("NA and NaN are in separate groups at the end", { df <- tibble(x = c(NA, NaN, NA, 1)) result <- compute_groups(df, "x") expect_identical(result$x, c(1, NaN, NA)) }) test_that("groups are ordered in the C locale", { df <- tibble(x = c("a", "A", "Z", "b")) result <- compute_groups(df, "x") expect_identical(result$x, c("A", "Z", "a", "b")) }) test_that("using the global option `dplyr.legacy_locale` forces the system locale", { skip_if_not(has_collate_locale("en_US"), message = "Can't use 'en_US' locale") local_options(dplyr.legacy_locale = TRUE) withr::local_collate("en_US") df <- tibble(x = c("a", "A", "Z", "b")) result <- compute_groups(df, "x") expect_identical(result$x, c("a", "A", "b", "Z")) }) dplyr/tests/testthat/test-join-rows.R0000644000176200001440000003067114525503021017441 0ustar liggesuserstest_that("`relationship` default behavior is correct", { # "warn-many-to-many" for equality joins expect_snapshot(out <- join_rows(c(1, 1), c(1, 1), condition = "==")) expect_equal(out$x, c(1L, 1L, 2L, 2L)) expect_equal(out$y, c(1L, 2L, 1L, 2L)) # "none" for rolling joins expect_warning(out <- join_rows(c(1, 2), c(1, 1), condition = ">=", filter = "max"), NA) expect_equal(out$x, c(1L, 1L, 2L, 2L)) expect_equal(out$y, c(1L, 2L, 1L, 2L)) # If rolling joins warned on many-to-many relationships, it would be a little # hard to explain that the above example warns, but this wouldn't just because # we've removed `2` as a key from `x`: # `join_rows(1, c(1, 1), condition = ">=", filter = "max")` # "none" for inequality joins (and overlap joins) expect_warning(out <- join_rows(c(1, 2), c(0, 1), condition = ">="), NA) expect_equal(out$x, c(1L, 1L, 2L, 2L)) expect_equal(out$y, c(1L, 2L, 1L, 2L)) # "none" for deprecated cross joins expect_warning(out <- join_rows(c(1, 1), c(1, 1), cross = TRUE), NA) expect_equal(out$x, c(1L, 1L, 2L, 2L)) expect_equal(out$y, c(1L, 2L, 1L, 2L)) }) test_that("`multiple` first/last/any works correctly", { out <- join_rows(c(1, 1), c(1, 1), multiple = "first") expect_equal(out$x, c(1L, 2L)) expect_equal(out$y, c(1L, 1L)) out <- join_rows(c(1, 1), c(1, 1), multiple = "last") expect_equal(out$x, c(1L, 2L)) expect_equal(out$y, c(2L, 2L)) out <- join_rows(c(1, 1), c(1, 1), multiple = "any") expect_equal(out$x, c(1L, 2L)) expect_equal(out$y %in% c(1L, 2L), c(TRUE, TRUE)) }) test_that("inner join only outputs matching keys", { out <- join_rows(c(2, 1), c(3, 4, 1), type = "inner") expect_equal(out$x, 2L) expect_equal(out$y, 3L) out <- join_rows(c(2, 1), c(3, 4, 1), type = "inner", condition = ">") expect_equal(out$x, 1L) expect_equal(out$y, 3L) }) test_that("left join contains all keys from x", { out <- join_rows(c(2, 1), c(3, 4, 1), type = "left") expect_equal(out$x, c(1L, 2L)) expect_equal(out$y, c(NA, 3L)) out <- join_rows(c(2, 1), c(3, 4, 1), type = "left", condition = ">") expect_equal(out$x, c(1L, 2L)) expect_equal(out$y, c(3L, NA)) }) test_that("right join contains all keys from y", { out <- join_rows(c(2, 1), c(3, 4, 1), type = "right") expect_equal(out$x, c(2L, NA, NA)) expect_equal(out$y, c(3L, 1L, 2L)) out <- join_rows(c(2, 1), c(3, 4, 1), type = "right", condition = ">=") expect_equal(out$x, c(1L, 2L, NA, NA)) expect_equal(out$y, c(3L, 3L, 1L, 2L)) }) test_that("full join contains all keys from both", { out <- join_rows(c(2, 1), c(3, 1), type = "full") expect_equal(out$x, c(1L, 2L, NA)) expect_equal(out$y, c(NA, 2L, 1L)) out <- join_rows(c(2, 1), c(3, 1), type = "full", condition = ">") expect_equal(out$x, c(1L, 2L, NA)) expect_equal(out$y, c(2L, NA, 1L)) }) test_that("nest join returns 0L for unmatched x keys", { out <- join_rows(c(2, 1), c(3, 4, 1), type = "nest") expect_equal(out$x, c(1L, 2L)) expect_equal(out$y, c(0L, 3L)) }) test_that("nest join returns 0L for missing x keys with `na_matches = 'never'`", { out <- join_rows(c(NA, 1), 1, type = "nest", na_matches = "never") expect_equal(out$x, c(1L, 2L)) expect_equal(out$y, c(0L, 1L)) }) test_that("matching rows can be filtered", { out <- join_rows(c(3, 5), c(2, 4, 1), condition = ">=", filter = "max") expect_equal(out$x, 1:2) expect_equal(out$y, 1:2) out <- join_rows(c(3, 5), c(2, 4, 1), condition = ">=", filter = "min") expect_equal(out$x, 1:2) expect_equal(out$y, c(3, 3)) }) test_that("missing values only match with `==`, `>=`, and `<=` conditions", { out <- join_rows(NA, NA, condition = "==") expect_identical(out$x, 1L) expect_identical(out$y, 1L) out <- join_rows(NA, NA, condition = ">=") expect_identical(out$x, 1L) expect_identical(out$y, 1L) out <- join_rows(NA, NA, condition = "<=") expect_identical(out$x, 1L) expect_identical(out$y, 1L) out <- join_rows(NA, NA, condition = ">") expect_identical(out$x, integer()) expect_identical(out$y, integer()) out <- join_rows(NA, NA, condition = "<") expect_identical(out$x, integer()) expect_identical(out$y, integer()) x <- tibble(x = c(1, 1), y = c(2, NA)) y <- tibble(x = c(1, 1), y = c(3, NA)) out <- join_rows(x, y, condition = c("==", "<=")) expect_identical(out$x, c(1L, 2L)) expect_identical(out$y, c(1L, 2L)) out <- join_rows(x, y, condition = c("==", "<")) expect_identical(out$x, 1L) expect_identical(out$y, 1L) }) test_that("join_rows() doesn't error on unmatched rows if they won't be dropped", { # 2 is unmatched, but a left join means we always retain that key out <- join_rows(c(1, 2), 1, type = "left", unmatched = "error") expect_identical(out$x, c(1L, 2L)) expect_identical(out$y, c(1L, NA)) out <- join_rows(c(1, 2), c(1, 3), type = "full", unmatched = "error") expect_identical(out$x, c(1L, 2L, NA)) expect_identical(out$y, c(1L, NA, 2L)) }) test_that("join_rows() allows `unmatched` to be specified independently for inner joins", { out <- join_rows(c(1, 2), 1, type = "inner", unmatched = c("drop", "error")) expect_identical(out$x, 1L) expect_identical(out$y, 1L) out <- join_rows(1, c(2, 1), type = "inner", unmatched = c("error", "drop")) expect_identical(out$x, 1L) expect_identical(out$y, 2L) # Both have dropped rows, only `y` is mentioned in the error expect_snapshot(error = TRUE, { join_rows(c(1, 3), c(1, 2), type = "inner", unmatched = c("drop", "error")) }) }) test_that("join_rows() expects incompatible type errors to have been handled by join_cast_common()", { expect_snapshot({ (expect_error( join_rows(data.frame(x = 1), data.frame(x = factor("a"))) )) }) }) test_that("join_rows() gives meaningful one-to-one errors", { expect_snapshot(error = TRUE, { join_rows(1, c(1, 1), relationship = "one-to-one") }) expect_snapshot(error = TRUE, { join_rows(c(1, 1), 1, relationship = "one-to-one") }) }) test_that("join_rows() gives meaningful one-to-many errors", { expect_snapshot(error = TRUE, { join_rows(c(1, 1), 1, relationship = "one-to-many") }) }) test_that("join_rows() gives meaningful many-to-one errors", { expect_snapshot(error = TRUE, { join_rows(1, c(1, 1), relationship = "many-to-one") }) }) test_that("join_rows() gives meaningful many-to-many warnings", { expect_snapshot({ join_rows(c(1, 1), c(1, 1)) }) # With proof that the defaults flow through user facing functions df <- data.frame(x = c(1, 1)) expect_snapshot({ left_join(df, df, by = join_by(x)) }) }) test_that("join_rows() gives meaningful error message on unmatched rows", { # Unmatched in the RHS expect_snapshot(error = TRUE, join_rows( data.frame(x = c(1, 2)), data.frame(x = c(3, 1)), type = "left", unmatched = "error" ) ) expect_snapshot(error = TRUE, join_rows( data.frame(x = c(1, 2)), data.frame(x = c(3, 1)), type = "nest", unmatched = "error" ) ) # Unmatched in the LHS expect_snapshot(error = TRUE, join_rows( data.frame(x = c(1, 2)), data.frame(x = c(3, 1)), type = "right", unmatched = "error" ) ) # Unmatched in either side expect_snapshot(error = TRUE, join_rows( data.frame(x = c(1, 2)), data.frame(x = 1), type = "inner", unmatched = "error" ) ) expect_snapshot(error = TRUE, join_rows( data.frame(x = c(1, 2)), data.frame(x = 1), type = "inner", unmatched = c("error", "drop") ) ) expect_snapshot(error = TRUE, join_rows( data.frame(x = 1), data.frame(x = c(1, 2)), type = "inner", unmatched = "error" ) ) expect_snapshot(error = TRUE, join_rows( data.frame(x = 1), data.frame(x = c(1, 2)), type = "inner", unmatched = c("drop", "error") ) ) }) test_that("join_rows() always errors on unmatched missing values", { # Unmatched in the RHS expect_snapshot(error = TRUE, join_rows( data.frame(x = 1), data.frame(x = NA), type = "left", unmatched = "error", na_matches = "na" ) ) expect_snapshot( error = TRUE, join_rows( data.frame(x = NA), data.frame(x = NA), type = "left", unmatched = "error", na_matches = "never" ) ) expect_snapshot( error = TRUE, join_rows( data.frame(x = 1), data.frame(x = NA), type = "nest", unmatched = "error", na_matches = "na" ) ) expect_snapshot( error = TRUE, join_rows( data.frame(x = NA), data.frame(x = NA), type = "nest", unmatched = "error", na_matches = "never" ) ) # Unmatched in the LHS expect_snapshot(error = TRUE, join_rows( data.frame(x = NA), data.frame(x = 1), type = "right", unmatched = "error", na_matches = "na" ) ) expect_snapshot(error = TRUE, join_rows( data.frame(x = NA), data.frame(x = NA), type = "right", unmatched = "error", na_matches = "never" ) ) # Unmatched in either side expect_snapshot(error = TRUE, join_rows( data.frame(x = 1), data.frame(x = c(1, NA)), type = "inner", unmatched = "error", na_matches = "na" ) ) expect_snapshot(error = TRUE, join_rows( data.frame(x = 1), data.frame(x = c(1, NA)), type = "inner", unmatched = c("drop", "error"), na_matches = "na" ) ) expect_snapshot(error = TRUE, join_rows( data.frame(x = c(1, NA)), data.frame(x = 1), type = "inner", unmatched = "error", na_matches = "na" ) ) expect_snapshot(error = TRUE, join_rows( data.frame(x = c(1, NA)), data.frame(x = 1), type = "inner", unmatched = c("error", "drop"), na_matches = "na" ) ) expect_snapshot(error = TRUE, join_rows( data.frame(x = NA), data.frame(x = NA), type = "inner", unmatched = "error", na_matches = "never" ) ) }) test_that("join_rows() validates `unmatched`", { df <- tibble(x = 1) expect_snapshot(error = TRUE, { join_rows(df, df, unmatched = 1) join_rows(df, df, unmatched = "foo") # One `unmatched` input is allowed for most joins join_rows(df, df, type = "left", unmatched = character()) join_rows(df, df, type = "left", unmatched = c("drop", "error")) # Two `unmatched` inputs are allowed for inner joins join_rows(df, df, type = "inner", unmatched = character()) join_rows(df, df, type = "inner", unmatched = c("drop", "error", "error")) join_rows(df, df, type = "inner", unmatched = c("drop", "dr")) }) }) test_that("join_rows() validates `relationship`", { df <- tibble(x = 1) expect_snapshot(error = TRUE, { join_rows(df, df, relationship = 1) }) # Notably can't use the vctrs options expect_snapshot(error = TRUE, { join_rows(df, df, relationship = "none") }) expect_snapshot(error = TRUE, { join_rows(df, df, relationship = "warn-many-to-many") }) }) test_that("join_rows() rethrows overflow error nicely (#6912)", { skip_on_cran() # Windows 32-bit doesn't support long vectors of this size, and the # intermediate `r_ssize` will be too large skip_if(.Machine$sizeof.pointer < 8L, message = "No long vector support") df <- tibble(x = 1:1e7) expect_snapshot(error = TRUE, { join_rows(df, df, condition = ">=") }) }) # Deprecated behavior ---------------------------------------------------------- test_that("`multiple = NULL` is deprecated and results in `'all'` (#6731)", { df1 <- tibble(x = c(1, 2)) df2 <- tibble(x = c(2, 1, 2)) expect_snapshot({ out <- join_rows(df1, df2, multiple = NULL) }) expect_identical(out$x, c(1L, 2L, 2L)) expect_identical(out$y, c(2L, 1L, 3L)) expect_snapshot({ left_join(df1, df2, by = join_by(x), multiple = NULL) }) }) test_that("`multiple = 'error'` is deprecated (#6731)", { df1 <- tibble(x = c(1, 2)) df2 <- tibble(x = c(2, 1, 2)) expect_snapshot(error = TRUE, { join_rows(df1, df2, multiple = "error") }) expect_snapshot(error = TRUE, { left_join(df1, df2, by = join_by(x), multiple = "error") }) }) test_that("`multiple = 'warning'` is deprecated (#6731)", { df1 <- tibble(x = c(1, 2)) df2 <- tibble(x = c(2, 1, 2)) expect_snapshot({ out <- join_rows(df1, df2, multiple = "warning") }) expect_identical(out$x, c(1L, 2L, 2L)) expect_identical(out$y, c(2L, 1L, 3L)) expect_snapshot({ left_join(df1, df2, by = join_by(x), multiple = "warning") }) }) dplyr/tests/testthat/test-defunct.R0000644000176200001440000000045714366556340017160 0ustar liggesuserstest_that("generate informative errors", { expect_snapshot(error = TRUE, { id() failwith() select_vars() rename_vars() select_var() current_vars() bench_tbls() compare_tbls() compare_tbls2() eval_tbls() eval_tbls2() location() changes() }) }) dplyr/tests/testthat/helper-pick.R0000644000176200001440000000013714366556340016751 0ustar liggesuserspick_wrapper <- function(...) { # Wrapping `pick()` forces evaluation fallback pick(...) } dplyr/tests/testthat/test-group-nest.R0000644000176200001440000000352614366556340017633 0ustar liggesuserstest_that("group_nest() works", { grouped <- group_by(starwars, species, homeworld) gdata <- group_data(grouped) res <- group_nest(starwars, species, homeworld) expect_type(pull(res), "list") expect_equal(attr(pull(res), "ptype"), vec_slice(select(starwars, -species, -homeworld), 0L)) expect_equal(res[1:2], structure(gdata[1:2], .drop = NULL)) nested <- bind_rows(!!!res$data) expect_equal(names(nested), setdiff(names(starwars), c("species", "homeworld"))) }) test_that("group_nest() can keep the grouping variables", { grouped <- group_by(starwars, species, homeworld) gdata <- group_data(grouped) res <- group_nest(starwars, species, homeworld, keep = TRUE) nested <- bind_rows(!!!res$data) expect_equal(names(nested), names(starwars)) }) test_that("group_nest() works on grouped data frames", { grouped <- group_by(starwars, species, homeworld) gdata <- group_data(grouped) res <- group_nest(grouped) expect_type(pull(res), "list") expect_equal(res[1:2], structure(gdata[1:2], .drop = NULL)) expect_equal(names(bind_rows(!!!res$data)), setdiff(names(starwars), c("species", "homeworld"))) res <- group_nest(grouped, keep = TRUE) expect_type(pull(res), "list") expect_equal(attr(pull(res), "ptype"), vec_slice(starwars, 0L)) expect_equal(res[1:2], structure(gdata[1:2], .drop = NULL)) expect_equal(names(bind_rows(!!!res$data)), names(starwars)) }) test_that("group_nest.grouped_df() warns about ...", { expect_warning(group_nest(group_by(mtcars, cyl), cyl)) }) test_that("group_nest() works if no grouping column", { res <- group_nest(iris) expect_equal(res$data, list(iris)) expect_equal(names(res), "data") }) test_that("group_nest() respects .drop", { nested <- tibble(f = factor("b", levels = c("a", "b", "c")), x = 1, y = 2) %>% group_nest(f, .drop = TRUE) expect_equal(nrow(nested), 1L) }) dplyr/tests/testthat/utf-8.txt0000644000176200001440000000157213663216626016132 0ustar liggesusers# UTF-8 tests that can't be run on Windows CRAN # R CMD check will try to parse the file anyway, # we use a different file extension to avoid this. df <- data.frame(中文1 = 1:10, 中文2 = 1:10, eng = 1:10) df2 <- df %>% mutate(中文1 = 中文1 + 1) gdf2 <- df %>% group_by(eng) %>% mutate(中文1 = 中文1 + 1) expect_equal(lobstr::obj_addrs(names(df)), lobstr::obj_addrs(names(df2))) expect_equal(lobstr::obj_addrs(names(df)), lobstr::obj_addrs(names(gdf2))) df3 <- filter(df2, eng > 5) gdf3 <- filter(gdf2, eng > 5) expect_equal(lobstr::obj_addrs(names(df)), lobstr::obj_addrs(names(df3))) expect_equal(lobstr::obj_addrs(names(df)), lobstr::obj_addrs(names(gdf3))) df4 <- filter(df2, 中文1 > 5) gdf4 <- filter(gdf2, 中文1 > 5) expect_equal(lobstr::obj_addrs(names(df)), lobstr::obj_addrs(names(df4))) expect_equal(lobstr::obj_addrs(names(df)), lobstr::obj_addrs(names(gdf4))) dplyr/tests/testthat/test-near.R0000644000176200001440000000012214266276767016455 0ustar liggesuserstest_that("near accepts nearby fp values", { expect_true(near(sqrt(2)^2, 2)) }) dplyr/tests/testthat/test-count-tally.R0000644000176200001440000001402014472225345017766 0ustar liggesusers# count ------------------------------------------------------------------- test_that("count sorts output by keys by default", { # Due to usage of `summarise()` internally df <- tibble(x = c(2, 1, 1, 2, 1)) out <- count(df, x) expect_equal(out, tibble(x = c(1, 2), n = c(3, 2))) }) test_that("count can sort output by `n`", { df <- tibble(x = c(1, 1, 2, 2, 2)) out <- count(df, x, sort = TRUE) expect_equal(out, tibble(x = c(2, 1), n = c(3, 2))) }) test_that("count can rename grouping columns", { # But should it really allow this? df <- tibble(x = c(2, 1, 1, 2, 1)) out <- count(df, y = x) expect_equal(out, tibble(y = c(1, 2), n = c(3, 2))) }) test_that("informs if n column already present, unless overridden", { df1 <- tibble(n = c(1, 1, 2, 2, 2)) expect_message(out <- count(df1, n), "already present") expect_named(out, c("n", "nn")) # not a good idea, but supported expect_message(out <- count(df1, n, name = "n"), NA) expect_named(out, "n") expect_message(out <- count(df1, n, name = "nn"), NA) expect_named(out, c("n", "nn")) df2 <- tibble(n = c(1, 1, 2, 2, 2), nn = 1:5) expect_message(out <- count(df2, n), "already present") expect_named(out, c("n", "nn")) expect_message(out <- count(df2, n, nn), "already present") expect_named(out, c("n", "nn", "nnn")) }) test_that("name must be string", { df <- tibble(x = c(1, 2)) expect_snapshot(error = TRUE, count(df, x, name = 1)) expect_snapshot(error = TRUE, count(df, x, name = letters)) }) test_that("output includes empty levels with .drop = FALSE", { df <- tibble(f = factor("b", levels = c("a", "b", "c"))) out <- count(df, f, .drop = FALSE) expect_equal(out$n, c(0, 1, 0)) out <- count(group_by(df, f, .drop = FALSE)) expect_equal(out$n, c(0, 1, 0)) }) test_that("count preserves grouping", { df <- tibble(g = c(1, 2, 2, 2)) exp <- tibble(g = c(1, 2), n = c(1, 3)) expect_equal(df %>% count(g), exp) expect_equal(df %>% group_by(g) %>% count(), exp %>% group_by(g)) }) test_that("output preserves class & attributes where possible", { df <- data.frame(g = c(1, 2, 2, 2)) attr(df, "my_attr") <- 1 out <- df %>% count(g) expect_s3_class(out, "data.frame", exact = TRUE) expect_equal(attr(out, "my_attr"), 1) out <- df %>% group_by(g) %>% count() expect_s3_class(out, "grouped_df") expect_equal(group_vars(out), "g") # summarise() currently drops attributes expect_null(attr(out, "my_attr")) }) test_that("works with dbplyr", { skip_if_not_installed("dbplyr") skip_if_not_installed("RSQLite") db <- dbplyr::memdb_frame(x = c(1, 1, 1, 2, 2)) df1 <- db %>% count(x) %>% as_tibble() expect_equal(df1, tibble(x = c(1, 2), n = c(3, 2))) df2 <- db %>% add_count(x) %>% as_tibble() expect_equal(df2, tibble(x = c(1, 1, 1, 2, 2), n = c(3, 3, 3, 2, 2))) }) test_that("dbplyr `count()` method has transient internal grouping (#6338, tidyverse/dbplyr#940)", { skip_if_not_installed("dbplyr") skip_if_not_installed("RSQLite") db <- dbplyr::memdb_frame( x = c(1, 1, 1, 2, 2), y = c("a", "a", "b", "c", "c") ) df <- db %>% count(x, y) %>% collect() expect <- tibble( x = c(1, 1, 2), y = c("a", "b", "c"), n = c(2L, 1L, 2L) ) expect_false(is_grouped_df(df)) expect_identical(df, expect) }) test_that("can only explicitly chain together multiple tallies", { expect_snapshot({ df <- data.frame(g = c(1, 1, 2, 2), n = 1:4) df %>% count(g, wt = n) df %>% count(g, wt = n) %>% count(wt = n) df %>% count(n) }) }) test_that("wt = n() is deprecated", { df <- data.frame(x = 1:3) expect_warning(count(df, wt = n()), "`wt = n()`", fixed = TRUE) }) test_that("count() owns errors (#6139)", { expect_snapshot({ (expect_error(count(mtcars, new = 1 + ""))) (expect_error(count(mtcars, wt = 1 + ""))) }) }) # tally ------------------------------------------------------------------- test_that("tally can sort output", { gf <- group_by(tibble(x = c(1, 1, 2, 2, 2)), x) out <- tally(gf, sort = TRUE) expect_equal(out, tibble(x = c(2, 1), n = c(3, 2))) }) test_that("weighted tally drops NAs (#1145)", { df <- tibble(x = c(1, 1, NA)) expect_equal(tally(df, x)$n, 2) }) test_that("tally() drops last group (#5199) ", { df <- data.frame(x = 1, y = 2, z = 3) res <- expect_message(df %>% group_by(x, y) %>% tally(wt = z), NA) expect_equal(group_vars(res), "x") }) test_that("tally() owns errors (#6139)", { expect_snapshot({ (expect_error(tally(mtcars, wt = 1 + ""))) }) }) # add_count --------------------------------------------------------------- test_that("add_count preserves grouping", { df <- tibble(g = c(1, 2, 2, 2)) exp <- tibble(g = c(1, 2, 2, 2), n = c(1, 3, 3, 3)) expect_equal(df %>% add_count(g), exp) expect_equal(df %>% group_by(g) %>% add_count(), exp %>% group_by(g)) }) test_that(".drop is deprecated", { local_options(lifecycle_verbosity = "warning") df <- tibble(f = factor("b", levels = c("a", "b", "c"))) expect_warning(out <- add_count(df, f, .drop = FALSE), "deprecated") }) test_that("add_count() owns errors (#6139)", { expect_snapshot({ (expect_error(add_count(mtcars, new = 1 + ""))) (expect_error(add_count(mtcars, wt = 1 + ""))) }) }) # add_tally --------------------------------------------------------------- test_that("can add tallies of a variable", { df <- tibble(a = c(2, 1, 1)) expect_equal( df %>% group_by(a) %>% add_tally(), group_by(tibble(a = c(2, 1, 1), n = c(1, 2, 2)), a) ) }) test_that("add_tally can be given a weighting variable", { df <- data.frame(a = c(1, 1, 2, 2, 2), w = c(1, 1, 2, 3, 4)) out <- df %>% group_by(a) %>% add_tally(wt = w) expect_equal(out$n, c(2, 2, 9, 9, 9)) out <- df %>% group_by(a) %>% add_tally(wt = w + 1) expect_equal(out$n, c(4, 4, 12, 12, 12)) }) test_that("can override output column", { df <- data.frame(g = c(1, 1, 2, 2, 2), x = c(3, 2, 5, 5, 5)) expect_named(add_tally(df, name = "xxx"), c("g", "x", "xxx")) }) test_that("add_tally() owns errors (#6139)", { expect_snapshot({ (expect_error(add_tally(mtcars, wt = 1 + ""))) }) }) dplyr/tests/testthat/test-rowwise.R0000644000176200001440000000706114366556340017225 0ustar liggesuserstest_that("rowwise status preserved by major verbs", { rf <- rowwise(tibble(x = 1:5, y = 5:1), "x") out <- arrange(rf, y) expect_s3_class(out, "rowwise_df") expect_equal(group_vars(out), "x") out <- filter(rf, x < 3) expect_s3_class(out, "rowwise_df") expect_equal(group_vars(out), "x") out <- mutate(rf, x = x + 1) expect_s3_class(out, "rowwise_df") expect_equal(group_vars(out), "x") out <- rename(rf, X = x) expect_s3_class(out, "rowwise_df") expect_equal(group_vars(out), "X") out <- select(rf, "x") expect_s3_class(out, "rowwise_df") expect_equal(group_vars(out), "x") out <- slice(rf, c(1, 1)) expect_s3_class(out, "rowwise_df") expect_equal(group_vars(out), "x") # Except for summarise out <- summarise(rf, z = mean(x, y)) expect_s3_class(out, "grouped_df") expect_equal(group_vars(out), "x") }) test_that("rowwise nature preserved by subsetting ops", { rf <- rowwise(tibble(x = 1:5, y = 1:5), "x") out <- rf[1] expect_s3_class(out, "rowwise_df") expect_equal(group_vars(out), "x") out[, "z"] <- 5:1 expect_s3_class(out, "rowwise_df") expect_equal(group_vars(out), "x") names(out) <- toupper(names(out)) expect_s3_class(out, "rowwise_df") expect_equal(group_vars(out), "X") }) test_that("except when it should be removed", { rf <- rowwise(tibble(x = 1:5, y = 1:5), "x") expect_equal(out <- rf[, 1, drop = TRUE], rf$x) }) test_that("rowwise has decent print method", { rf <- rowwise(tibble(x = 1:5), "x") expect_snapshot(rf) }) test_that("rowwise captures group_vars", { df <- group_by(tibble(g = 1:2, x = 1:2), g) rw <- rowwise(df) expect_equal(group_vars(rw), "g") # but can't regroup expect_error(rowwise(df, x), "Can't re-group") }) test_that("can re-rowwise", { rf1 <- rowwise(tibble(x = 1:5, y = 1:5), "x") rf2 <- rowwise(rf1, y) expect_equal(group_vars(rf2), "y") }) test_that("new_rowwise_df() does not require `group_data=`", { df <- new_rowwise_df(data.frame(x = 1:2)) expect_s3_class(df, "rowwise_df") expect_equal(attr(df, "groups"), tibble(".rows" := vctrs::list_of(1L, 2L))) }) test_that("new_rowwise_df() can add class and attributes (#5918)", { df <- new_rowwise_df(tibble(x = 1:4), tibble(), class = "custom_rowwise_df", a = "b") expect_s3_class(df, "custom_rowwise_df") expect_equal(attr(df, "a"), "b") }) test_that("validate_rowwise_df() gives useful errors", { df1 <- rowwise(tibble(x = 1:4, g = rep(1:2, each = 2)), g) groups <- attr(df1, "groups") groups[[2]] <- 4:1 attr(df1, "groups") <- groups df2 <- rowwise(tibble(x = 1:4, g = rep(1:2, each = 2)), g) groups <- attr(df2, "groups") names(groups) <- c("g", "not.rows") attr(df2, "groups") <- groups df3 <- df2 attr(df3, "groups") <- tibble() df4 <- df3 attr(df4, "groups") <- NA df7 <- rowwise(tibble(x = 1:10)) attr(df7, "groups")$.rows <- 11:20 df8 <- rowwise(tibble(x = 1:10)) df10 <- df7 attr(df10, "groups") <- tibble() df11 <- df7 attr(df11, "groups") <- NULL expect_snapshot({ (expect_error(validate_rowwise_df(df1))) (expect_error(validate_rowwise_df(df2))) (expect_error(validate_rowwise_df(df3))) (expect_error(validate_rowwise_df(df4))) (expect_error(validate_rowwise_df(df7))) (expect_error(attr(df8, "groups")$.rows <- 1:8)) (expect_error(validate_rowwise_df(df10))) (expect_error(validate_rowwise_df(df11))) (expect_error( new_rowwise_df( tibble(x = 1:10), tibble(".rows" := list(1:5, -1L)) ) )) (expect_error( new_rowwise_df( tibble(x = 1:10), 1:10 ) )) }) }) dplyr/tests/testthat/test-arrange.R0000644000176200001440000003376514406402754017151 0ustar liggesusers# To turn on warnings from tibble::`names<-()` local_options(lifecycle_verbosity = "warning") test_that("empty arrange() returns input", { df <- tibble(x = 1:10, y = 1:10) gf <- group_by(df, x) expect_identical(arrange(df), df) expect_identical(arrange(gf), gf) expect_identical(arrange(df, !!!list()), df) expect_identical(arrange(gf, !!!list()), gf) }) test_that("can sort empty data frame", { df <- tibble(a = numeric(0)) expect_equal(arrange(df, a), df) }) test_that("local arrange sorts missing values to end", { df <- data.frame(x = c(2, 1, NA)) expect_equal(df %>% arrange(x) %>% pull(), c(1, 2, NA)) expect_equal(df %>% arrange(desc(x)) %>% pull(), c(2, 1, NA)) }) test_that("arrange() gives meaningful errors", { expect_snapshot({ # duplicated column name (expect_error( tibble(x = 1, x = 1, .name_repair = "minimal") %>% arrange(x) )) # error in mutate() step (expect_error( tibble(x = 1) %>% arrange(y) )) (expect_error( tibble(x = 1) %>% arrange(rep(x, 2)) )) }) }) # column types ---------------------------------------------------------- test_that("arrange handles list columns (#282)", { # no intrinsic ordering df <- tibble(x = 1:3, y = list(3, 2, 1)) expect_equal(arrange(df, y), df) df <- tibble(x = 1:3, y = list(sum, mean, sd)) expect_equal(arrange(df, y), df) }) test_that("arrange handles raw columns (#1803)", { df <- tibble(x = 1:3, y = as.raw(3:1)) expect_equal(arrange(df, y), df[3:1, ]) }) test_that("arrange handles matrix columns", { df <- tibble(x = 1:3, y = matrix(6:1, ncol = 2)) expect_equal(arrange(df, y), df[3:1, ]) }) test_that("arrange handles data.frame columns (#3153)", { df <- tibble(x = 1:3, y = data.frame(z = 3:1)) expect_equal(arrange(df, y), tibble(x = 3:1, y = data.frame(z = 1:3))) }) test_that("arrange handles complex columns", { df <- tibble(x = 1:3, y = 3:1 + 2i) expect_equal(arrange(df, y), df[3:1, ]) }) test_that("arrange handles S4 classes (#1105)", { TestS4 <- suppressWarnings(setClass("TestS4", contains = "integer")) setMethod('[', 'TestS4', function(x, i, ...){ TestS4(unclass(x)[i, ...]) }) on.exit(removeClass("TestS4"), add = TRUE) df <- tibble(x = 1:3, y = TestS4(3:1)) expect_equal(arrange(df, y), df[3:1, ]) }) test_that("arrange works with two columns when the first has a data frame proxy (#6268)", { # `id1` has a data frame proxy for `vec_proxy_order()` df <- tibble( id1 = new_rcrd(list(x = 1, y = 1)), id2 = c(1, 3, 2) ) out <- arrange(df, id1, id2) expect_identical(out$id2, c(1, 2, 3)) }) test_that("arrange ignores NULLs (#6193)", { df <- tibble(x = 1:2) y <- NULL out <- arrange(df, y, desc(x)) expect_equal(out$x, 2:1) out <- arrange(df, y, desc(x), y) expect_equal(out$x, 2:1) }) test_that("`arrange()` works with `numeric_version` (#6680)", { x <- numeric_version(c("1.11", "1.2.3", "1.2.2")) df <- tibble(x = x) expect <- df[c(3, 2, 1),] expect_identical(arrange(df, x), expect) }) # locale -------------------------------------------------------------- test_that("arrange defaults to the C locale", { x <- c("A", "a", "b", "B") df <- tibble(x = x) res <- arrange(df, x) expect_identical(res$x, c("A", "B", "a", "b")) res <- arrange(df, desc(x)) expect_identical(res$x, rev(c("A", "B", "a", "b"))) }) test_that("locale can be set to an English locale", { skip_if_not_installed("stringi", "1.5.3") x <- c("A", "a", "b", "B") df <- tibble(x = x) res <- arrange(df, x, .locale = "en") expect_identical(res$x, c("a", "A", "b", "B")) }) test_that("non-English locales can be used", { skip_if_not_installed("stringi", "1.5.3") # Danish `o` with `/` through it sorts after `z` in Danish locale x <- c("o", "\u00F8", "p", "z") df <- tibble(x = x) # American English locale puts it right after `o` res <- arrange(df, x, .locale = "en") expect_identical(res$x, x) res <- arrange(df, x, .locale = "da") expect_identical(res$x, x[c(1, 3, 4, 2)]) }) test_that("arrange errors if stringi is not installed and a locale identifier is used", { expect_snapshot(error = TRUE, { locale_to_chr_proxy_collate("fr", has_stringi = FALSE) }) }) test_that("arrange validates `.locale`", { df <- tibble() expect_snapshot(error = TRUE, { arrange(df, .locale = 1) }) expect_snapshot(error = TRUE, { arrange(df, .locale = c("en_US", "fr_BF")) }) }) test_that("arrange validates that `.locale` must be one from stringi", { skip_if_not_installed("stringi", "1.5.3") df <- tibble() expect_snapshot(error = TRUE, { arrange(df, .locale = "x") }) }) # data ---------------------------------------------------------------- test_that("arrange preserves input class", { df1 <- data.frame(x = 1:3, y = 3:1) df2 <- tibble(x = 1:3, y = 3:1) df3 <- df1 %>% group_by(x) expect_s3_class(arrange(df1, x), "data.frame", exact = TRUE) expect_s3_class(arrange(df2, x), "tbl_df") expect_s3_class(arrange(df3, x), "grouped_df") }) test_that("grouped arrange ignores group, unless requested with .by_group", { df <- data.frame(g = c(2, 1, 2, 1), x = 4:1) gf <- group_by(df, g) expect_equal(arrange(gf, x), gf[4:1, ,]) expect_equal(arrange(gf, x, .by_group = TRUE), gf[c(4, 2, 3, 1), ,]) }) test_that("arrange updates the grouping structure (#605)", { df <- tibble(g = c(2, 2, 1, 1), x = c(1, 3, 2, 4)) res <- df %>% group_by(g) %>% arrange(x) expect_s3_class(res, "grouped_df") expect_equal(group_rows(res), list_of(c(2L, 4L), c(1L, 3L))) }) test_that("arrange() supports across() and pick() (#4679)", { df <- tibble(x = c(1, 3, 2, 1), y = c(4, 3, 2, 1)) expect_identical( df %>% arrange(pick(everything())), df %>% arrange(x, y) ) expect_identical( df %>% arrange(across(everything(), .fns = desc)), df %>% arrange(desc(x), desc(y)) ) expect_identical( df %>% arrange(pick(x)), df %>% arrange(x) ) expect_identical( df %>% arrange(across(y, .fns = identity)), df %>% arrange(y) ) }) test_that("arrange() works with across() and pick() cols that return multiple columns (#6490)", { df <- tibble( a = c(1, 1, 1), b = c(2, 2, 2), c = c(4, 4, 3), d = c(5, 2, 7) ) expect_identical( arrange(df, across(c(a, b), .fns = identity), across(c(c, d), .fns = identity)), df[c(3, 2, 1),] ) expect_identical( arrange(df, pick(a, b), pick(c, d)), df[c(3, 2, 1),] ) }) test_that("arrange() evaluates each pick() call on the original data (#6495)", { df <- tibble(x = 2:1) out <- arrange(df, TRUE, pick(everything())) expect_identical(out, df[c(2, 1),]) out <- arrange(df, NULL, pick(everything())) expect_identical(out, df[c(2, 1),]) }) test_that("arrange() with empty dots still calls dplyr_row_slice()", { tbl <- new_tibble(list(x = 1), nrow = 1L) foo <- structure(tbl, class = c("foo_df", class(tbl))) local_methods( # `foo_df` always loses class when row slicing dplyr_row_slice.foo_df = function(data, i, ...) { out <- NextMethod() new_tibble(out, nrow = nrow(out)) } ) expect_s3_class(arrange(foo), class(tbl), exact = TRUE) expect_s3_class(arrange(foo, x), class(tbl), exact = TRUE) }) test_that("can arrange() with unruly class", { local_methods( `[.dplyr_foobar` = function(x, i, ...) new_dispatched_quux(vec_slice(x, i)), dplyr_row_slice.dplyr_foobar = function(x, i, ...) x[i, ] ) df <- foobar(data.frame(x = 1:3)) expect_identical( arrange(df, desc(x)), quux(data.frame(x = 3:1, dispatched = TRUE)) ) }) test_that("arrange() preserves the call stack on error (#5308)", { foobar <- function() stop("foo") stack <- NULL expect_error( withCallingHandlers( error = function(...) stack <<- sys.calls(), arrange(mtcars, foobar()) ) ) expect_true(some(stack, is_call, "foobar")) }) test_that("desc() inside arrange() checks the number of arguments (#5921)", { expect_snapshot({ df <- data.frame(x = 1, y = 2) (expect_error(arrange(df, desc(x, y)))) }) }) test_that("arrange keeps zero length groups",{ df <- tibble( e = 1, f = factor(c(1, 1, 2, 2), levels = 1:3), g = c(1, 1, 2, 2), x = c(1, 2, 1, 4) ) df <- group_by(df, e, f, g, .drop = FALSE) expect_equal( group_size(arrange(df)), c(2, 2, 0) ) expect_equal( group_size(arrange(df, x)), c(2, 2, 0) ) }) # legacy -------------------------------------------------------------- test_that("legacy - using the global option `dplyr.legacy_locale` forces the system locale", { skip_if_not(has_collate_locale("en_US"), message = "Can't use 'en_US' locale") local_options(dplyr.legacy_locale = TRUE) withr::local_collate("en_US") df <- tibble(x = c("a", "A", "Z", "b")) expect_identical(arrange(df, x)$x, c("a", "A", "b", "Z")) }) test_that("legacy - usage of `.locale` overrides `dplyr.legacy_locale`", { skip_if_not_installed("stringi", "1.5.3") local_options(dplyr.legacy_locale = TRUE) # Danish `o` with `/` through it sorts after `z` in Danish locale x <- c("o", "\u00F8", "p", "z") df <- tibble(x = x) # American English locale puts it right after `o` res <- arrange(df, x, .locale = "en") expect_identical(res$x, x) res <- arrange(df, x, .locale = "da") expect_identical(res$x, x[c(1, 3, 4, 2)]) }) test_that("legacy - empty arrange() returns input", { local_options(dplyr.legacy_locale = TRUE) df <- tibble(x = 1:10, y = 1:10) gf <- group_by(df, x) expect_identical(arrange(df), df) expect_identical(arrange(gf), gf) expect_identical(arrange(df, !!!list()), df) expect_identical(arrange(gf, !!!list()), gf) }) test_that("legacy - can sort empty data frame", { local_options(dplyr.legacy_locale = TRUE) df <- tibble(a = numeric(0)) expect_equal(arrange(df, a), df) }) test_that("legacy - local arrange sorts missing values to end", { local_options(dplyr.legacy_locale = TRUE) df <- data.frame(x = c(2, 1, NA)) expect_equal(df %>% arrange(x) %>% pull(), c(1, 2, NA)) expect_equal(df %>% arrange(desc(x)) %>% pull(), c(2, 1, NA)) }) test_that("legacy - arrange handles list columns (#282)", { local_options(dplyr.legacy_locale = TRUE) # no intrinsic ordering df <- tibble(x = 1:3, y = list(3, 2, 1)) expect_equal(arrange(df, y), df) df <- tibble(x = 1:3, y = list(sum, mean, sd)) expect_equal(arrange(df, y), df) }) test_that("legacy - arrange handles raw columns (#1803)", { local_options(dplyr.legacy_locale = TRUE) df <- tibble(x = 1:3, y = as.raw(3:1)) expect_equal(arrange(df, y), df[3:1, ]) }) test_that("legacy - arrange handles matrix columns", { local_options(dplyr.legacy_locale = TRUE) df <- tibble(x = 1:3, y = matrix(6:1, ncol = 2)) expect_equal(arrange(df, y), df[3:1, ]) }) test_that("legacy - arrange handles data.frame columns (#3153)", { local_options(dplyr.legacy_locale = TRUE) df <- tibble(x = 1:3, y = data.frame(z = 3:1)) expect_equal(arrange(df, y), tibble(x = 3:1, y = data.frame(z = 1:3))) }) test_that("legacy - arrange handles complex columns", { local_options(dplyr.legacy_locale = TRUE) df <- tibble(x = 1:3, y = 3:1 + 2i) expect_equal(arrange(df, y), df[3:1, ]) }) test_that("legacy - arrange handles S4 classes (#1105)", { local_options(dplyr.legacy_locale = TRUE) TestS4 <- suppressWarnings(setClass("TestS4", contains = "integer")) setMethod('[', 'TestS4', function(x, i, ...){ TestS4(unclass(x)[i, ...]) }) on.exit(removeClass("TestS4"), add = TRUE) df <- tibble(x = 1:3, y = TestS4(3:1)) expect_equal(arrange(df, y), df[3:1, ]) }) test_that("legacy - `arrange()` works with `numeric_version` (#6680)", { local_options(dplyr.legacy_locale = TRUE) x <- numeric_version(c("1.11", "1.2.3", "1.2.2")) df <- tibble(x = x) expect <- df[c(3, 2, 1),] expect_identical(arrange(df, x), expect) }) test_that("legacy - arrange works with two columns when the first has a data frame proxy (#6268)", { local_options(dplyr.legacy_locale = TRUE) # `id1` has a data frame proxy for `vec_proxy_order()` df <- tibble( id1 = new_rcrd(list(x = 1, y = 1)), id2 = c(1, 3, 2) ) out <- arrange(df, id1, id2) expect_identical(out$id2, c(1, 2, 3)) }) test_that("legacy - arrange() supports across() and pick() (#4679)", { local_options(dplyr.legacy_locale = TRUE) df <- tibble(x = c(1, 3, 2, 1), y = c(4, 3, 2, 1)) expect_identical( df %>% arrange(pick(everything())), df %>% arrange(x, y) ) expect_identical( df %>% arrange(across(everything(), .fns = desc)), df %>% arrange(desc(x), desc(y)) ) expect_identical( df %>% arrange(pick(x)), df %>% arrange(x) ) expect_identical( df %>% arrange(across(y, .fns = identity)), df %>% arrange(y) ) }) test_that("legacy - arrange() works with across() and pick() cols that return multiple columns (#6490)", { local_options(dplyr.legacy_locale = TRUE) df <- tibble( a = c(1, 1, 1), b = c(2, 2, 2), c = c(4, 4, 3), d = c(5, 2, 7) ) expect_identical( arrange(df, across(c(a, b), .fns = identity), across(c(c, d), .fns = identity)), df[c(3, 2, 1),] ) expect_identical( arrange(df, pick(a, b), pick(c, d)), df[c(3, 2, 1),] ) }) test_that("legacy - arrange sorts missings in df-cols correctly", { local_options(dplyr.legacy_locale = TRUE) col <- tibble(a = c(1, 1, 1), b = c(3, NA, 1)) df <- tibble(x = col) expect_identical(arrange(df, x), df[c(3, 1, 2),]) expect_identical(arrange(df, desc(x)), df[c(1, 3, 2),]) }) test_that("legacy - arrange with duplicates in a df-col uses a stable sort", { local_options(dplyr.legacy_locale = TRUE) col <- tibble(a = c(1, 1, 1, 1, 1), b = c(3, NA, 2, 3, NA)) df <- tibble(x = col, y = 1:5) expect_identical(arrange(df, x)$y, c(3L, 1L, 4L, 2L, 5L)) expect_identical(arrange(df, desc(x))$y, c(1L, 4L, 3L, 2L, 5L)) }) test_that("legacy - arrange with doubly nested df-col doesn't infloop", { local_options(dplyr.legacy_locale = TRUE) one <- tibble(a = c(1, 1, 1, 1, 1), b = c(1, 1, 2, 2, 2)) two <- tibble(a = c(1, 1, 1, 1, 1), b = c(2, 1, 1, 2, 2)) col <- tibble(one = one, two = two) df <- tibble(x = col, y = c(1, 1, 1, 1, 0)) expect_identical(arrange(df, x, y), df[c(2, 1, 3, 5, 4),]) }) dplyr/tests/testthat/test-coalesce.R0000644000176200001440000000534114366556340017303 0ustar liggesuserstest_that("non-missing scalar replaces all missing values", { x <- c(NA, 1) expect_equal(coalesce(x, 1), c(1, 1)) }) test_that("coerces to common type", { expect_identical(coalesce(NA, 1), 1) f <- factor("x", levels = c("x", "y")) expect_identical(coalesce(NA, f), f) }) test_that("inputs are recycled to their common size", { expect_identical(coalesce(1, c(2, 3)), c(1, 1)) }) test_that("finds non-missing values in multiple positions", { x1 <- c(1L, NA, NA) x2 <- c(NA, 2L, NA) x3 <- c(NA, NA, 3L) expect_equal(coalesce(x1, x2, x3), 1:3) }) test_that("coalesce() gives meaningful error messages", { expect_snapshot({ (expect_error(coalesce(1:2, 1:3))) (expect_error(coalesce(1:2, letters[1:2]))) }) }) test_that("coalesce() supports one-dimensional arrays (#5557)", { x <- array(1:10) out <- coalesce(x, 0L) expect_identical(out, x) }) test_that("only updates entirely missing matrix rows", { x <- c( 1, NA, NA, NA ) x <- matrix(x, nrow = 2, byrow = TRUE) y <- c( 2, 2, NA, 1 ) y <- matrix(y, nrow = 2, byrow = TRUE) expect <- c( 1, NA, NA, 1 ) expect <- matrix(expect, nrow = 2, byrow = TRUE) expect_identical(coalesce(x, y), expect) }) test_that("only updates entirely missing data frame rows", { x <- tibble(x = c(1, NA), y = c(NA, NA)) y <- tibble(x = c(2, NA), y = c(TRUE, TRUE)) expect <- tibble(x = c(1, NA), y = c(NA, TRUE)) expect_identical(coalesce(x, y), expect) }) test_that("only updates entirely missing rcrd observations", { x <- new_rcrd(list(x = c(1, NA), y = c(NA, NA))) y <- new_rcrd(list(x = c(2, NA), y = c(TRUE, TRUE))) expect <- new_rcrd(list(x = c(1, NA), y = c(NA, TRUE))) expect_identical(coalesce(x, y), expect) }) test_that("recycling is done on the values early", { expect_identical(coalesce(1, 1:2), c(1, 1)) }) test_that("`.ptype` overrides the common type (r-lib/funs#64)", { x <- c(1L, NA) expect_identical(coalesce(x, 99, .ptype = x), c(1L, 99L)) }) test_that("`.size` overrides the common size", { x <- 1L expect_snapshot(error = TRUE, { coalesce(x, 1:2, .size = vec_size(x)) }) }) test_that("must have at least one non-`NULL` vector", { expect_snapshot(error = TRUE, { coalesce() }) expect_snapshot(error = TRUE, { coalesce(NULL, NULL) }) }) test_that("`NULL`s are discarded (r-lib/funs#80)", { expect_identical( coalesce(c(1, NA, NA), NULL, c(1, 2, NA), NULL, 3), c(1, 2, 3) ) }) test_that("inputs must be vectors", { expect_snapshot(error = TRUE, { coalesce(1, environment()) }) }) test_that("names in error messages are indexed correctly", { expect_snapshot(error = TRUE, { coalesce(1, "x") }) expect_snapshot(error = TRUE, { coalesce(1, y = "x") }) }) dplyr/tests/testthat/test-nth-value.R0000644000176200001440000001503014406402754017416 0ustar liggesusers# ------------------------------------------------------------------------------ # nth() test_that("nth works with lists and uses `vec_slice2()` to return elements (#6331)", { # We'd like to use `vec_slice()` everywhere, but it breaks too many revdeps # that rely on `nth()` returning list elements x <- list(1, 2, 3:5) expect_equal(nth(x, 1), 1) expect_equal(nth(x, 3), 3:5) }) test_that("nth `default` for lists defaults to `NULL` since it uses `vec_slice2()`", { expect_null(nth(list(1), 2)) expect_null(nth(list(), 1)) }) test_that("nth `default` for lists can be anything", { # Because list elements can be anything x <- list(1, 2) default <- environment() expect_identical(nth(x, 3, default = default), default) default <- 1:3 expect_identical(nth(x, 3, default = default), default) }) test_that("nth treats list-of like lists", { x <- list_of(1, 2, c(3, 4)) expect_identical(nth(x, 3), c(3, 4)) expect_identical(nth(x, 4), NULL) # Not particularly strict about `default` here, # even though `list_of()` elements are typed expect_identical(nth(x, 4, default = "x"), "x") }) test_that("nth works with data frames and always returns a single row", { x <- tibble(x = 1:3, y = 4:6) expect_identical(nth(x, 1), tibble(x = 1L, y = 4L)) expect_identical(nth(x, 4), tibble(x = NA_integer_, y = NA_integer_)) expect_identical(nth(x, 4, default = tibble(x = 0, y = 0)), tibble(x = 0L, y = 0L)) }) test_that("nth works with rcrds", { x <- new_rcrd(list(x = 1:3, y = 4:6)) expect_identical(nth(x, 1), vec_slice(x, 1)) expect_identical(nth(x, 4), vec_init(x)) expect_identical(nth(x, 4, default = x[2]), x[2]) }) test_that("drops names, because it uses `vec_slice2()`", { x <- c(a = 1, b = 2) expect_named(nth(x, 2), NULL) }) test_that("negative values index from end", { x <- 1:5 expect_equal(nth(x, -1), 5L) expect_equal(nth(x, -3), 3L) }) test_that("indexing past ends returns default value", { expect_equal(nth(1:4, 5), NA_integer_) expect_equal(nth(1:4, -5), NA_integer_) expect_equal(nth(1:4, -10), NA_integer_) expect_equal(nth(1:4, -10, default = 6L), 6L) }) test_that("gets corner case indexing correct", { expect_identical(nth(1:4, -5), NA_integer_) expect_identical(nth(1:4, -4), 1L) expect_identical(nth(1:4, -3), 2L) expect_identical(nth(1:4, -1), 4L) expect_identical(nth(1:4, 0), NA_integer_) expect_identical(nth(1:4, 1), 1L) expect_identical(nth(1:4, 3), 3L) expect_identical(nth(1:4, 4), 4L) expect_identical(nth(1:4, 5), NA_integer_) }) test_that("`order_by` can be used to alter the order", { expect_identical(nth(1:5, n = 1L, order_by = 5:1), 5L) expect_identical(nth(as.list(1:5), n = 1L, order_by = 5:1), 5L) }) test_that("can use a data frame as `order_by`", { x <- 1:3 order_by <- tibble(a = c(1, 1, 2), b = c(2, 1, 0)) expect_identical(nth(x, 1, order_by = order_by), 2L) expect_identical(nth(x, 2, order_by = order_by), 1L) }) test_that("`na_rm` can be used to drop missings before selecting the value (#6242)", { x <- c(NA, 4, 10, NA, 5, NA) expect_identical(nth(x, 1, na_rm = TRUE), 4) expect_identical(nth(x, -1, na_rm = TRUE), 5) expect_identical(nth(x, 3, na_rm = TRUE), 5) }) test_that("`na_rm` removes `NULL` list elements", { x <- list(1:3, NULL, 4, integer(), NULL, NULL) expect_identical(nth(x, 2, na_rm = TRUE), 4) expect_identical(nth(x, -1, na_rm = TRUE), integer()) }) test_that("`na_rm` can generate OOB selections, resulting in `default`", { # Removes some values x <- c(NA, FALSE, NA) expect_identical(nth(x, 2, default = TRUE, na_rm = TRUE), TRUE) # Removes everything x <- c(NA, NA, NA) expect_identical(nth(x, 1, default = TRUE, na_rm = TRUE), TRUE) expect_identical(nth(x, -2, default = TRUE, na_rm = TRUE), TRUE) }) test_that("`na_rm` slices `order_by` as well", { x <- c(NA, 4, 10, NA, 5, NA) o <- c(2, 1, 3, 1, 1, 0) expect_identical(nth(x, 1, order_by = o, na_rm = TRUE), 4) expect_identical(nth(x, -1, order_by = o, na_rm = TRUE), 10) expect_identical(nth(x, 2, order_by = o, na_rm = TRUE), 5) expect_identical(nth(x, 3, order_by = o, na_rm = TRUE), 10) }) test_that("`na_rm` is validated", { expect_snapshot(error = TRUE, { nth(1, 1, na_rm = 1) }) expect_snapshot(error = TRUE, { nth(1, 1, na_rm = c(TRUE, FALSE)) }) }) test_that("`default` must be size 1 (when not used with lists)", { expect_snapshot(error = TRUE, { nth(1L, n = 2L, default = 1:2) }) }) test_that("`default` is cast to the type of `x` (when not used with lists)", { expect_snapshot(error = TRUE, { nth("x", 2, default = 2) }) }) test_that("`n` is validated (#5466)", { expect_snapshot(error = TRUE, { nth(1:10, n = "x") }) expect_snapshot(error = TRUE, { nth(1:10, n = 1:2) }) expect_snapshot(error = TRUE, { nth(1:10, n = NA_integer_) }) }) test_that("`x` must be a vector", { expect_snapshot(error = TRUE, { nth(environment(), 1L) }) }) test_that("`order_by` must be the same size as `x`", { expect_snapshot(error = TRUE, { nth(1:5, n = 1L, order_by = 1:2) }) # Ensure that this is checked before `default` is early returned expect_snapshot(error = TRUE, { nth(1:5, n = 6L, order_by = 1:2) }) }) # ------------------------------------------------------------------------------ # first() test_that("`first()` selects the first value", { expect_identical(first(1:5), 1L) }) test_that("`first()` uses default value for 0 length vectors", { expect_equal(first(logical()), NA) expect_equal(first(integer()), NA_integer_) expect_equal(first(numeric()), NA_real_) expect_equal(first(character()), NA_character_) }) test_that("`first()` uses `NULL` default for 0 length lists", { expect_identical(first(list()), NULL) }) test_that("`first()` uses default value for 0 length augmented vectors", { fc <- factor("a")[0] dt <- Sys.Date()[0] tm <- Sys.time()[0] expect_equal(first(fc), vec_init(fc)) expect_equal(first(dt), vec_init(dt)) expect_equal(first(tm), vec_init(tm)) }) test_that("`first()` returns list elements", { expect_identical(first(list(2:3, 4:5)), 2:3) }) test_that("`first()` respects `na_rm`", { x <- c(NA, NA, 2, 3) expect_identical(first(x, na_rm = TRUE), 2) }) # ------------------------------------------------------------------------------ # last() test_that("`last()` selects the last value", { expect_identical(last(1:5), 5L) }) test_that("`last()` returns list elements", { expect_identical(last(list(2:3, 4:5)), 4:5) }) test_that("`last()` respects `na_rm`", { x <- c(2, 3, NA, NA) expect_identical(last(x, na_rm = TRUE), 3) }) dplyr/tests/testthat/test-deprec-lazyeval.R0000644000176200001440000002334114472225345020610 0ustar liggesuserstest_that("can select negatively (#2519)", { withr::local_options(lifecycle_verbosity = "quiet") expect_identical(select_(mtcars, ~ -cyl), mtcars[-2]) }) test_that("select yields proper names", { withr::local_options(lifecycle_verbosity = "quiet") expect_identical(names(select_(mtcars, ~ cyl:hp)), c("cyl", "disp", "hp")) }) test_that("lazydots are named and arrange() doesn't fail (it assumes empty names)", { withr::local_options(lifecycle_verbosity = "quiet") dots <- compat_lazy_dots(list(), env(), "cyl") expect_identical(names(dots), "") expect_identical(arrange_(mtcars, "cyl"), arrange(mtcars, cyl)) }) test_that("mutate_each_() and summarise_each_() handle lazydots", { withr::local_options(lifecycle_verbosity = "quiet") cyl_chr <- mutate_each_(mtcars, list(as.character), "cyl")$cyl expect_identical(cyl_chr, as.character(mtcars$cyl)) cyl_mean <- summarise_each_(mtcars, list(mean), "cyl")$cyl expect_equal(cyl_mean, mean(mtcars$cyl)) }) test_that("mutate_each() and mutate_each_() are deprecated (#6869)", { df <- tibble(x = 1:2, y = 3:4) expect_snapshot({ mutate_each(df, list(~ .x + 1L)) }) expect_snapshot({ mutate_each_(df, list(~ .x + 1L), c("x", "y")) }) }) test_that("summarise_each() and summarise_each_() are deprecated (#6869)", { df <- tibble(x = 1:2, y = 3:4) expect_snapshot({ summarise_each(df, list(mean)) }) expect_snapshot({ summarise_each_(df, list(mean), c("x", "y")) }) }) test_that("select_vars_() handles lazydots", { withr::local_options(lifecycle_verbosity = "quiet") expect_identical(select_vars_(letters, c("a", "b")), set_names(c("a", "b"))) expect_identical( select_vars_(letters, c("a", "b"), include = "c"), set_names(c("c", "a", "b")) ) expect_identical( select_vars_(letters, c("a", "b"), exclude = "b"), set_names(c("a")) ) }) df <- tibble( a = c(1:3, 2:3), b = letters[c(1:4, 4L)] ) test_that("arrange_ works", { withr::local_options(lifecycle_verbosity = "quiet") expect_equal( arrange_(df, ~ -a), arrange(df, -a) ) expect_equal( arrange_(df, .dots = list(quote(-a))), arrange(df, -a) ) expect_equal( arrange_(df, .dots = list(~ -a)), arrange(df, -a) ) }) test_that("count_() works", { withr::local_options(lifecycle_verbosity = "quiet") expect_equal( count_(df, ~ b), count(df, b) ) expect_equal( count_(df, ~ b, wt = quote(a)), count(df, b, wt = a) ) wt <- 1:4 expect_identical( count_(df, "b", "wt"), count(df, b, wt = wt) ) expect_identical( add_count(df, b), add_count_(df, ~ b) ) }) test_that("distinct_() works", { withr::local_options(lifecycle_verbosity = "quiet") expect_equal( distinct_(df, ~ a), distinct(df, a) ) expect_equal( distinct_(df, .dots = list(quote(a))), distinct(df, a) ) expect_equal( distinct_(df, .dots = list(~ a)), distinct(df, a) ) expect_equal( distinct_(df %>% group_by(b), ~ a, .dots = NULL), distinct(df %>% group_by(b), a) ) expect_equal( distinct_(df %>% group_by(b), .dots = list(quote(a))), distinct(df %>% group_by(b), a) ) expect_equal( distinct_(df %>% group_by(b), .dots = list(~ a)), distinct(df %>% group_by(b), a) ) }) test_that("do_() works", { withr::local_options(lifecycle_verbosity = "quiet") expect_equal( do_(df, ~ tibble(-.$a)), do(df, tibble(-.$a)) ) expect_equal( do_(df, .dots = list(quote(dplyr::tibble(-.$a)))), do(df, tibble(-.$a)) ) expect_equal( do_(df, .dots = list(~ dplyr::tibble(-.$a))), do(df, tibble(-.$a)) ) foo <- "foobar" expect_identical( do_(df, .dots = "tibble(foo)"), do(df, tibble(foo)) ) expect_equal( do_(df %>% group_by(b), ~ tibble(-.$a)), do(df %>% group_by(b), tibble(-.$a)) ) expect_equal( do_(df %>% group_by(b), .dots = list(quote(dplyr::tibble(-.$a)))), do(df %>% group_by(b), tibble(-.$a)) ) expect_equal( do_(df %>% group_by(b), .dots = list(~ dplyr::tibble(-.$a))), do(df %>% group_by(b), tibble(-.$a)) ) }) test_that("filter_() works", { withr::local_options(lifecycle_verbosity = "quiet") expect_equal( filter_(df, ~ a > 1), filter(df, a > 1) ) expect_equal( filter_(df, .dots = list(quote(a > 1))), filter(df, a > 1) ) cnd <- rep(TRUE, 5) expect_identical( filter_(df, .dots = "cnd"), filter(df, cnd) ) }) test_that("group_by_() works", { withr::local_options(lifecycle_verbosity = "quiet") expect_equal( group_by_(df, ~ a), group_by(df, a) ) expect_equal( group_by_(df, ~ -a), group_by(df, -a) ) expect_equal( group_by_(df, .dots = "a"), group_by(df, a) ) expect_equal( group_by_(df, .dots = list(quote(-a))), group_by(df, -a) ) expect_equal( group_by_(df, .dots = list(~ -a)), group_by(df, -a) ) }) test_that("mutate_() works", { withr::local_options(lifecycle_verbosity = "quiet") expect_equal( mutate_(df, c = ~ -a), mutate(df, c = -a) ) expect_equal( mutate_(df, .dots = list(c = quote(-a))), mutate(df, c = -a) ) expect_equal( mutate_(df, .dots = list(c = ~ -a)), mutate(df, c = -a) ) expect_identical( mutate_(df, ~ -a), mutate(df, -a) ) foo <- "foobar" expect_identical( mutate_(df, .dots = "foo"), mutate(df, foo) ) }) test_that("rename_() works", { withr::local_options(lifecycle_verbosity = "quiet") expect_equal( rename_(df, c = ~ a), rename(df, c = a) ) expect_equal( rename_(df, .dots = list(c = quote(a))), rename(df, c = a) ) expect_equal( rename_(df, .dots = list(c = ~ a)), rename(df, c = a) ) }) test_that("select_() works", { withr::local_options(lifecycle_verbosity = "quiet") expect_equal( select_(df, ~ a), select(df, a) ) expect_equal( select_(df, ~ -a), select(df, -a) ) expect_equal( select_(df, .dots = "a"), select(df, a) ) expect_equal( select_(df, .dots = list(quote(-a))), select(df, -a) ) expect_equal( select_(df, .dots = list(~ -a)), select(df, -a) ) }) test_that("slice_() works", { withr::local_options(lifecycle_verbosity = "quiet") expect_equal( slice_(df, ~ 2:n()), slice(df, 2:n()) ) expect_equal( slice_(df, .dots = list(quote(2:n()))), slice(df, 2:n()) ) expect_equal( slice_(df, .dots = list(~ 2:n())), slice(df, 2:n()) ) pos <- 3 expect_identical( slice_(df, .dots = "pos:n()"), slice(df, pos:n()) ) }) test_that("summarise_() works", { withr::local_options(lifecycle_verbosity = "quiet") expect_equal( summarise_(df, a = ~ mean(a)), summarise(df, a = mean(a)) ) expect_equal( summarise_(df, .dots = list(a = quote(mean(a)))), summarise(df, a = mean(a)) ) expect_equal( summarise_(df, .dots = list(a = ~ mean(a))), summarise(df, a = mean(a)) ) my_mean <- mean expect_identical( summarise_(df, .dots = c(a = "my_mean(a)")), summarise(df, a = my_mean(a)) ) expect_equal( summarise_(df %>% group_by(b), a = ~ mean(a)), summarise(df %>% group_by(b), a = mean(a)) ) expect_equal( summarise_(df %>% group_by(b), .dots = list(a = quote(mean(a)))), summarise(df %>% group_by(b), a = mean(a)) ) expect_equal( summarise_(df %>% group_by(b), .dots = list(a = ~ mean(a))), summarise(df %>% group_by(b), a = mean(a)) ) }) test_that("summarize_() works", { withr::local_options(lifecycle_verbosity = "quiet") expect_equal( summarize_(df, a = ~ mean(a)), summarize(df, a = mean(a)) ) expect_equal( summarize_(df, .dots = list(a = quote(mean(a)))), summarize(df, a = mean(a)) ) expect_equal( summarize_(df, .dots = list(a = ~ mean(a))), summarize(df, a = mean(a)) ) expect_equal( summarize_(df %>% group_by(b), a = ~ mean(a)), summarize(df %>% group_by(b), a = mean(a)) ) expect_equal( summarize_(df %>% group_by(b), .dots = list(a = quote(mean(a)))), summarize(df %>% group_by(b), a = mean(a)) ) expect_equal( summarize_(df %>% group_by(b), .dots = list(a = ~ mean(a))), summarize(df %>% group_by(b), a = mean(a)) ) }) test_that("transmute_() works", { withr::local_options(lifecycle_verbosity = "quiet") expect_equal( transmute_(df, c = ~ -a), transmute(df, c = -a) ) expect_equal( transmute_(df, .dots = list(c = quote(-a))), transmute(df, c = -a) ) expect_equal( transmute_(df, .dots = list(c = ~ -a)), transmute(df, c = -a) ) foo <- "foobar" expect_identical( transmute_(df, .dots = "foo"), transmute(df, foo) ) }) test_that("_each() and _all() families agree", { withr::local_options(lifecycle_verbosity = "quiet") df <- data.frame(x = 1:3, y = 1:3) expect_equal(summarise_each(df, list(mean)), summarise_all(df, mean)) expect_equal(summarise_each(df, list(mean), x), summarise_at(df, vars(x), mean)) expect_equal(summarise_each(df, list(mean = mean), x), summarise_at(df, vars(x), list(mean = mean))) expect_equal(summarise_each(df, list(mean = mean), x:y), summarise_at(df, vars(x:y), list(mean = mean))) expect_equal(summarise_each(df, list(mean), x:y), summarise_at(df, vars(x:y), mean)) expect_equal(summarise_each(df, list(mean), z = y), summarise_at(df, vars(z = y), mean)) expect_equal(mutate_each(df, list(mean)), mutate_all(df, mean)) expect_equal(mutate_each(df, list(mean), x), mutate_at(df, vars(x), mean)) expect_equal(mutate_each(df, list(mean = mean), x), mutate_at(df, vars(x), list(mean = mean))) expect_equal(mutate_each(df, list(mean = mean), x:y), mutate_at(df, vars(x:y), list(mean = mean))) expect_equal(mutate_each(df, list(mean), x:y), mutate_at(df, vars(x:y), mean)) expect_equal(mutate_each(df, list(mean), z = y), mutate_at(df, vars(z = y), mean)) }) dplyr/tests/testthat/test-generics.R0000644000176200001440000001252614525503021017310 0ustar liggesuserstest_that("row_slice recomputes groups", { gf <- group_by(data.frame(g = c(1, 1, 2, 2, 3, 3)), g) out <- dplyr_row_slice(gf, c(1L, 3L, 5L)) expect_equal(group_data(out)$.rows, list_of(1L, 2L, 3L)) out <- dplyr_row_slice(gf, c(4L, 3L)) expect_equal(group_data(out)$.rows, list_of(c(1L, 2L))) }) test_that("row_slice preserves empty groups if requested", { gf <- group_by(data.frame(g = c(1, 1, 2, 2, 3, 3)), g, .drop = FALSE) out <- dplyr_row_slice(gf, c(3L, 4L)) expect_equal(group_data(out)$.rows, list_of(integer(), c(1L, 2L), integer())) }) # dplyr_col_modify -------------------------------------------------------- test_that("empty cols returns input", { df <- data.frame(x = 1) expect_equal(dplyr_col_modify(df, list()), df) }) test_that("applies tidyverse recycling rules", { expect_equal( dplyr_col_modify(data.frame(x = 1:2), list(y = 1)), data.frame(x = 1:2, y = c(1, 1)) ) expect_equal( dplyr_col_modify(data.frame(x = integer()), list(y = 1)), data.frame(x = integer(), y = integer()) ) expect_error( dplyr_col_modify(data.frame(x = 1:4), list(y = 1:2)), class = "vctrs_error_recycle_incompatible_size" ) }) test_that("can add, remove, and replace columns", { df <- data.frame(x = 1, y = 2) expect_equal(dplyr_col_modify(df, list(y = NULL)), data.frame(x = 1)) expect_equal(dplyr_col_modify(df, list(y = 3)), data.frame(x = 1, y = 3)) expect_equal(dplyr_col_modify(df, list(z = 3)), data.frame(x = 1, y = 2, z = 3)) }) test_that("doesn't expand row names", { df <- data.frame(x = 1:10) out <- dplyr_col_modify(df, list(y = 1)) expect_equal(.row_names_info(out, 1), -10) }) test_that("preserves existing row names", { df <- data.frame(x = c(1, 2), row.names = c("a", "b")) out <- dplyr_col_modify(df, list(y = 1)) expect_equal(row.names(df), c("a", "b")) }) test_that("reconstruct method gets a data frame", { seen_df <- NULL local_methods( dplyr_reconstruct.dplyr_foobar = function(data, template) { if (is.data.frame(data)) { seen_df <<- TRUE } NextMethod() } ) df <- foobar(data.frame(x = 1)) seen_df <- FALSE dplyr_col_modify(df, list(y = 2)) expect_true(seen_df) seen_df <- FALSE dplyr_row_slice(df, 1) expect_true(seen_df) }) # dplyr_reconstruct ------------------------------------------------------- test_that("classes are restored", { expect_identical( dplyr_reconstruct(tibble(), data.frame()), data.frame() ) expect_identical( dplyr_reconstruct(tibble(), tibble()), tibble() ) expect_identical( dplyr_reconstruct(tibble(), new_data_frame(class = "foo")), new_data_frame(class = "foo") ) }) test_that("attributes of `template` are kept", { expect_identical( dplyr_reconstruct(new_tibble(list(), nrow = 1), new_data_frame(foo = 1)), new_data_frame(n = 1L, foo = 1) ) }) test_that("compact row names are retained", { data <- vec_rbind(tibble(a = 1), tibble(a = 2)) template <- tibble() x <- dplyr_reconstruct(data, template) expect <- tibble(a = c(1, 2)) expect_identical(x, expect) # Explicitly ensure internal row name structure is identical expect_identical( .row_names_info(x, type = 0L), .row_names_info(expect, type = 0L) ) }) test_that("dplyr_reconstruct() strips attributes before dispatch", { local_methods( dplyr_reconstruct.dplyr_foobar = function(data, template) { out <<- data } ) df <- foobar(data.frame(x = 1), foo = "bar") out <- NULL dplyr_reconstruct(df, df) expect_identical(out, data.frame(x = 1)) df <- foobar(data.frame(x = 1, row.names = "a"), foo = "bar") out <- NULL dplyr_reconstruct(df, df) expect_identical(out, data.frame(x = 1, row.names = "a")) }) test_that("`dplyr_reconstruct()` retains attribute ordering of `template`", { df <- vctrs::data_frame(x = 1) expect_identical( attributes(dplyr_reconstruct(df, df)), attributes(df) ) }) test_that("`dplyr_reconstruct()` doesn't modify the original `data` in place", { data <- new_data_frame(list(x = 1), foo = "bar") template <- vctrs::data_frame(x = 1) out <- dplyr_reconstruct(data, template) expect_null(attr(out, "foo")) expect_identical(attr(data, "foo"), "bar") }) test_that("`dplyr_reconstruct()`, which gets and sets attributes, doesn't touch `row.names` (#6525)", { skip_if_no_lazy_character() dplyr_attributes <- function(x) { .Call(ffi_test_dplyr_attributes, x) } dplyr_set_attributes <- function(x, attributes) { .Call(ffi_test_dplyr_set_attributes, x, attributes) } df <- vctrs::data_frame(x = 1) attributes <- attributes(df) attributes$row.names <- new_lazy_character(function() "a") attributes <- as.pairlist(attributes) df_with_lazy_row_names <- dplyr_set_attributes(df, attributes) # Ensure `data` row names aren't materialized x <- dplyr_reconstruct(df_with_lazy_row_names, df) attributes <- dplyr_attributes(df_with_lazy_row_names) expect_false(lazy_character_is_materialized(attributes$row.names)) # `data` row names should also propagate into the result unmaterialized attributes <- dplyr_attributes(x) expect_false(lazy_character_is_materialized(attributes$row.names)) # Ensure `template` row names aren't materialized x <- dplyr_reconstruct(df, df_with_lazy_row_names) attributes <- dplyr_attributes(df_with_lazy_row_names) expect_false(lazy_character_is_materialized(attributes$row.names)) }) dplyr/tests/testthat/test-pull.R0000644000176200001440000000124414266276767016512 0ustar liggesuserstest_that("default extracts last var from data frame", { df <- tibble(x = 1:10, y = 1:10) expect_equal(pull(df), 1:10) }) test_that("can extract by name, or positive/negative position", { x <- 1:10 df <- tibble(x = x, y = runif(10)) expect_equal(pull(df, x), x) expect_equal(pull(df, 1L), x) expect_equal(pull(df, 1), x) expect_equal(pull(df, -2), x) expect_equal(pull(df, -2L), x) }) test_that("can extract named vectors", { x <- 1:10 y <- letters[x] df <- tibble(x = x, y = y) xn <- set_names(x, y) expect_equal(pull(df, x), x) expect_equal(pull(df, x, y), xn) expect_equal(pull(df, 1, 2), xn) expect_equal(names(pull(df, x, y)), y) }) dplyr/tests/testthat/test-sets.R0000644000176200001440000001024514472225345016476 0ustar liggesuserstest_that("x used as basis of output (#3839)", { df1 <- tibble(x = 1:4, y = 1) df2 <- tibble(y = 1, x = c(4, 2)) expect_equal(intersect(df1, df2), tibble(x = c(2, 4), y = 1)) expect_equal(union(df1, df2), tibble(x = 1:4, y = 1)) expect_equal(union_all(df1, df2), tibble(x = c(1:4, 4, 2), y = 1)) expect_equal(setdiff(df1, df2), tibble(x = c(1, 3), y = 1)) expect_equal(symdiff(df1, df2), tibble(x = c(1, 3), y = 1)) }) test_that("set operations (apart from union_all) remove duplicates", { df1 <- tibble(x = c(1, 1, 2)) df2 <- tibble(x = 2) expect_equal(intersect(df1, df2), tibble(x = 2)) expect_equal(union(df1, df2), tibble(x = c(1, 2))) expect_equal(union_all(df1, df2), tibble(x = c(1, 1, 2, 2))) expect_equal(setdiff(df1, df2), tibble(x = 1)) expect_equal(symdiff(df1, df2), tibble(x = 1)) }) test_that("standard coercion rules are used (#799)", { df1 <- tibble(x = 1:2, y = c(1, 1)) df2 <- tibble(x = 1:2, y = 1:2) expect_equal(nrow(intersect(df1, df2)), 1) expect_equal(nrow(union(df1, df2)), 3) expect_equal(nrow(union_all(df1, df2)), 4) expect_equal(nrow(setdiff(df1, df2)), 1) expect_equal(nrow(symdiff(df1, df2)), 2) }) test_that("grouping metadata is reconstructed (#3587)", { df1 <- tibble(x = 1:4, g = rep(1:2, each = 2)) %>% group_by(g) df2 <- tibble(x = 3:6, g = rep(2:3, each = 2)) expect_equal(group_vars(intersect(df1, df2)), "g") expect_equal(group_vars(union(df1, df2)), "g") expect_equal(group_vars(union_all(df1, df2)), "g") expect_equal(group_vars(setdiff(df1, df2)), "g") expect_equal(group_vars(symdiff(df1, df2)), "g") }) test_that("also work with vectors", { expect_equal(intersect(1:3, 3:4), 3) expect_equal(union(1:3, 3:4), 1:4) expect_equal(union_all(1:3, 3:4), c(1:3, 3:4)) expect_equal(setdiff(1:3, 3:4), 1:2) expect_equal(symdiff(1:3, 3:4), c(1, 2, 4)) # removes duplicates expect_equal(symdiff(c(1, 1, 2), c(2, 2, 3)), c(1, 3)) }) test_that("extra arguments in ... error (#5891)", { df1 <- tibble(var = 1:3) df2 <- tibble(var = 2:4) expect_snapshot(error = TRUE, { intersect(df1, df2, z = 3) union(df1, df2, z = 3) union_all(df1, df2, z = 3) setdiff(df1, df2, z = 3) symdiff(df1, df2, z = 3) }) }) test_that("incompatible data frames error (#903)", { df1 <- tibble(x = 1) df2 <- tibble(x = 1, y = 1) expect_snapshot(error = TRUE, { intersect(df1, df2) union(df1, df2) union_all(df1, df2) setdiff(df1, df2) symdiff(df1, df2) }) }) test_that("is_compatible generates useful messages for different cases", { expect_snapshot({ cat(is_compatible(tibble(x = 1), 1)) cat(is_compatible(tibble(x = 1), tibble(x = 1, y = 2))) cat(is_compatible(tibble(x = 1, y = 1), tibble(y = 1, x = 1), ignore_col_order = FALSE)) cat(is_compatible(tibble(x = 1), tibble(y = 1))) cat(is_compatible(tibble(x = 1), tibble(x = 1L), convert = FALSE)) cat(is_compatible(tibble(x = 1), tibble(x = "a"))) }) }) # setequal ---------------------------------------------------------------- test_that("setequal ignores column and row order", { df1 <- tibble(x = 1:2, y = 3:4) df2 <- df1[2:1, 2:1] expect_true(setequal(df1, df2)) expect_true(setequal(df1, df2)) }) test_that("setequal ignores duplicated rows (#6057)", { df1 <- tibble(x = 1) df2 <- df1[c(1, 1, 1), ] expect_true(setequal(df1, df2)) expect_true(setequal(df2, df1)) }) test_that("setequal uses coercion rules (#6114)", { df1 <- tibble(x = 1) df2 <- tibble(x = 1L) expect_true(setequal(df1, df2)) expect_true(setequal(df2, df1)) }) test_that("setequal tibbles must have same rows and columns", { # Different rows are the definition of not equal expect_false(setequal(tibble(x = 1:2), tibble(x = 2:3))) # Different or incompatible columns are an error, like the other set ops (#6786) expect_snapshot(error = TRUE, { setequal(tibble(x = 1:2), tibble(y = 1:2)) }) expect_snapshot(error = TRUE, { setequal(tibble(x = 1:2), tibble(x = c("a", "b"))) }) }) test_that("setequal checks y is a data frame", { expect_snapshot(setequal(mtcars, 1), error = TRUE) }) test_that("setequal checks for extra arguments", { expect_snapshot(setequal(mtcars, mtcars, z = 2), error = TRUE) }) dplyr/tests/testthat/test-group-split.R0000644000176200001440000001005014366556340020003 0ustar liggesuserstest_that("group_split() keeps the grouping variables by default", { tbl <- tibble(x = 1:4, g = factor(rep(c("a", "b"), each = 2))) res <- group_split(tbl, g) expect_equal(res, list_of(tbl[1:2,], tbl[3:4,])) expect_identical(res, list_of(tbl[1:2,], tbl[3:4,])) expect_s3_class(res, "vctrs_list_of") expect_identical(attr(res, "ptype"), tibble(x = integer(), g = factor(levels = c("a", "b")))) }) test_that("group_split() can discard the grouping variables with .keep = FALSE", { tbl <- tibble(x = 1:4, g = factor(rep(c("a", "b"), each = 2))) res <- group_split(tbl, g, .keep = FALSE) expect_identical(res, list_of(tbl[1:2, 1, drop = FALSE], tbl[3:4,1, drop = FALSE])) expect_s3_class(res, "vctrs_list_of") expect_identical(attr(res, "ptype"), tibble(x = integer())) }) test_that("group_split() respects empty groups", { tbl <- tibble(x = 1:4, g = factor(rep(c("a", "b"), each = 2), levels = c("a", "b", "c"))) res <- group_split(tbl, g) expect_identical(res, list_of(tbl[1:2,], tbl[3:4,])) expect_s3_class(res, "vctrs_list_of") expect_identical(attr(res, "ptype"), tibble(x = integer(), g = factor(levels = c("a", "b", "c")))) res <- group_split(tbl, g, .drop = FALSE) expect_identical(res, list_of(tbl[1:2,], tbl[3:4,], tbl[integer(), ])) }) test_that("group_split.grouped_df() warns about ...", { expect_warning(group_split(group_by(mtcars, cyl), cyl)) }) test_that("group_split.rowwise_df() warns about ...", { expect_warning(group_split(rowwise(mtcars), cyl)) }) test_that("group_split.grouped_df() works", { iris <- as_tibble(iris) expect_identical( iris %>% group_by(Species) %>% group_split(), iris %>% group_split(Species) ) }) test_that("group_split / bind_rows round trip", { setosa <- iris %>% filter(Species == "setosa") %>% as_tibble() chunks <- setosa %>% group_split(Species) expect_identical(length(chunks), 1L) expect_identical(bind_rows(chunks), setosa) chunks <- setosa %>% group_split(Species, .drop = FALSE) expect_identical(length(chunks), 3L) expect_identical(bind_rows(chunks), setosa) }) test_that("group_split() works if no grouping column", { expect_identical(group_split(iris), list_of(as_tibble(iris))) }) test_that("group_split(keep=FALSE) does not try to remove virtual grouping columns (#4045)", { iris3 <- as_tibble(iris[1:3,]) rows <- list(c(1L, 3L, 2L), c(3L, 2L, 3L)) df <- new_grouped_df( iris3, groups = tibble(.bootstrap = 1:2, .rows := rows) ) res <- group_split(df, .keep = FALSE) expect_identical( res, list_of(iris3[rows[[1L]],], iris3[rows[[2L]],]) ) }) test_that("group_split() respects .drop", { chunks <- tibble(f = factor("b", levels = c("a", "b", "c"))) %>% group_split(f, .drop = TRUE) expect_identical(length(chunks), 1L) }) test_that("group_split() on a bare data frame returns bare tibbles", { df <- data.frame(x = 1:2) tib <- as_tibble(df) expect <- list_of(vec_slice(tib, 1), vec_slice(tib, 2)) expect_identical(group_split(df, x), expect) }) test_that("group_split() on a grouped df returns a list of tibbles", { df <- tibble(x = 1:2) gdf <- group_by(df, x) expect <- list_of(vec_slice(df, 1), vec_slice(df, 2)) expect_identical(group_split(gdf), expect) }) test_that("group_split() on a rowwise df returns a list of tibbles", { df <- tibble(x = 1:2) rdf <- rowwise(df) expect <- list_of(vec_slice(df, 1), vec_slice(df, 2)) expect_identical(group_split(rdf), expect) }) test_that("group_split() works with subclasses implementing group_by() / ungroup()", { local_foo_df() df <- list(x = c(1, 2, 2)) df <- new_tibble(df, nrow = 3L, class = "foo_df") expect <- list_of(vec_slice(df, 1), vec_slice(df, 2:3)) expect_identical(group_split(df, x), expect) }) test_that("group_split() internally uses dplyr_row_slice()", { local_foo_df() df <- list(x = c(1, 2, 2)) df <- new_tibble(df, nrow = 3L, class = "foo_df") local_methods( dplyr_row_slice.foo_df = function(x, i, ...) { abort(class = "dplyr_row_slice_called") } ) expect_error(group_split(df, x), class = "dplyr_row_slice_called") }) dplyr/tests/testthat/test-bind-cols.R0000644000176200001440000000745014406402754017374 0ustar liggesuserstest_that("bind_cols() uses shallow copies", { skip_if_not_installed("lobstr") df1 <- data.frame( int = 1:10, num = rnorm(10), cha = letters[1:10], stringsAsFactors = FALSE ) df2 <- data.frame( log = sample(c(T, F), 10, replace = TRUE), dat = seq.Date(Sys.Date(), length.out = 10, by = "day"), tim = seq(Sys.time(), length.out = 10, by = "1 hour") ) df <- bind_cols(df1, df2) expect_equal(lobstr::obj_addrs(df1), lobstr::obj_addrs(df[names(df1)])) expect_equal(lobstr::obj_addrs(df2), lobstr::obj_addrs(df[names(df2)])) }) test_that("bind_cols() handles lists (#1104)", { exp <- tibble(x = 1, y = "a", z = 2) l1 <- list(x = 1, y = "a") l2 <- list(z = 2) expect_identical(bind_cols(l1, l2), exp) expect_identical(bind_cols(list(l1, l2)), exp) }) test_that("bind_cols() handles empty argument list (#1963)", { expect_equal(bind_cols(), tibble()) }) test_that("bind_cols() handles all-NULL values (#2303)", { expect_identical(bind_cols(list(a = NULL, b = NULL)), tibble()) expect_identical(bind_cols(NULL), tibble()) }) test_that("bind_cols() repairs names", { df <- tibble(a = 1, b = 2) expect_snapshot(bound <- bind_cols(df, df)) expect_message( repaired <- as_tibble( data.frame(a = 1, b = 2, a = 1, b = 2, check.names = FALSE), .name_repair = "unique" ), "New names" ) expect_identical(bound, repaired) }) test_that("bind_cols() unpacks tibbles", { expect_equal( bind_cols(list(y = tibble(x = 1:2))), tibble(x = 1:2) ) expect_equal( bind_cols(list(y = tibble(x = 1:2), z = tibble(y = 1:2))), tibble(x = 1:2, y = 1:2) ) }) test_that("bind_cols() honours .name_repair=", { expect_message(res <- bind_cols( data.frame(a = 1), data.frame(a = 2) )) expect_equal(res, data.frame(a...1 = 1, a...2 = 2)) expect_error(bind_cols(.name_repair = "check_unique", data.frame(a = 1), data.frame(a = 2) )) }) test_that("bind_cols() accepts NULL (#1148)", { df1 <- tibble(a = 1:10, b = 1:10) df2 <- tibble(c = 1:10, d = 1:10) res1 <- bind_cols(df1, df2) res2 <- bind_cols(NULL, df1, df2) res3 <- bind_cols(df1, NULL, df2) res4 <- bind_cols(df1, df2, NULL) expect_identical(res1, res2) expect_identical(res1, res3) expect_identical(res1, res4) }) test_that("bind_cols() infers classes from first result (#1692)", { d1 <- data.frame(a = 1:10, b = rep(1:2, each = 5)) d2 <- tibble(c = 1:10, d = rep(1:2, each = 5)) d3 <- group_by(d2, d) d4 <- rowwise(d2) d5 <- list(c = 1:10, d = rep(1:2, each = 5)) suppressMessages({ expect_equal(class(bind_cols(d1, d1)), "data.frame") expect_equal(class(bind_cols(d2, d1)), c("tbl_df", "tbl", "data.frame")) }) res3 <- bind_cols(d3, d1) expect_equal(class(res3), c("grouped_df", "tbl_df", "tbl", "data.frame")) expect_equal(map_int(group_rows(res3), length), c(5, 5)) expect_equal(class(bind_cols(d4, d1)), c("rowwise_df", "tbl_df", "tbl", "data.frame")) expect_equal(class(bind_cols(d5, d1)), "data.frame") }) test_that("accepts named columns", { expect_identical(bind_cols(a = 1:2, b = 3:4), tibble(a = 1:2, b = 3:4)) }) test_that("ignores NULL values", { expect_identical(bind_cols(a = 1, NULL, b = 2, NULL), tibble(a = 1, b = 2)) }) test_that("bind_cols() handles unnamed list with name repair (#3402)", { expect_snapshot(df <- bind_cols(list(1, 2))) expect_identical(df, bind_cols(list(...1 = 1, ...2 = 2))) }) test_that("bind_cols() doesn't squash record types", { df <- data.frame(x = 1) posixlt <- as.POSIXlt(as.Date("1970-01-01")) expect_identical( bind_cols(df, y = posixlt), new_data_frame(list(x = 1, y = posixlt)) ) }) test_that("bind_cols() gives informative errors", { expect_snapshot({ "# incompatible size" (expect_error(bind_cols(a = 1:2, mtcars))) (expect_error(bind_cols(mtcars, a = 1:3))) }) }) dplyr/tests/testthat/test-n-distinct.R0000644000176200001440000000262214366556340017600 0ustar liggesuserstest_that("n_distinct() counts empty inputs", { expect_equal(n_distinct(NULL), 0) expect_equal(n_distinct(data.frame()), 0) }) test_that("n_distinct() counts unique values in simple vectors", { expect_equal(n_distinct(c(TRUE, FALSE, NA)), 3) expect_equal(n_distinct(c(1, 2, NA)), 3) expect_equal(n_distinct(c(1L, 2L, NA)), 3) expect_equal(n_distinct(c("x", "y", NA)), 3) }) test_that("n_distinct() counts unique combinations", { expect_equal(n_distinct(c(1, 1, 1), c(2, 2, 2)), 1) expect_equal(n_distinct(c(1, 1, 2), c(1, 2, 2)), 3) }) test_that("n_distinct() handles data frames", { expect_equal(n_distinct(data.frame(c(1, 1, 1), c(2, 2, 2))), 1) expect_equal(n_distinct(data.frame(c(1, 1, 2), c(1, 2, 2))), 3) }) test_that("n_distinct() can drop missing values", { expect_equal(n_distinct(NA, na.rm = TRUE), 0) expect_equal(n_distinct(c(NA, 0), na.rm = TRUE), 1) expect_equal(n_distinct(c(NA, 0), c(0, NA), na.rm = TRUE), 0) expect_equal(n_distinct(c(NA, 0), c(0, 0), na.rm = TRUE), 1) # check tibbles unpacked correctly expect_equal(n_distinct(1, tibble(x = 2, y = NA), na.rm = TRUE), 0) }) test_that("n_distinct() follows recycling rules", { expect_equal(n_distinct(double(), 1), 0) expect_equal(n_distinct(1:2, 1), 2) }) test_that("n_distinct() generates useful errors", { expect_snapshot(error = TRUE, { n_distinct() n_distinct(x = 1:4) n_distinct(mean) }) }) dplyr/tests/testthat/test-rank.R0000644000176200001440000000653214366556340016463 0ustar liggesusers# ranking functions ------------------------------------------------------- test_that("ranking empty vector returns empty vector (#762)", { x <- numeric() expect_equal(row_number(x), numeric()) expect_equal(min_rank(x), numeric()) expect_equal(dense_rank(x), numeric()) expect_equal(percent_rank(x), numeric()) expect_equal(cume_dist(x), numeric()) expect_equal(ntile(x, 1), numeric()) }) test_that("rank functions deal pass NA (and NaN) through (#774, #1132)", { x <- c(1, 2, NA, 1, 0, NaN) expect_equal(percent_rank(x), c(1 / 3, 1, NA, 1 / 3, 0, NA)) expect_equal(min_rank(x), c(2L, 4L, NA, 2L, 1L, NA)) expect_equal(dense_rank(x), c(2L, 3L, NA, 2L, 1L, NA)) expect_equal(cume_dist(x), c(.75, 1, NA, .75, .25, NA)) expect_equal(row_number(x), c(2L, 4L, NA, 3L, 1L, NA)) }) test_that("ranking functions can handle data frames", { # Explicitly testing partially/fully incomplete rows df <- tibble( year = c(2020, 2020, 2021, 2020, 2020, NA), month = c(3, 2, 1, 2, NA, NA) ) expect_identical(row_number(df), c(3L, 1L, 4L, 2L, NA, NA)) expect_identical(min_rank(df), c(3L, 1L, 4L, 1L, NA, NA)) expect_identical(dense_rank(df), c(2L, 1L, 3L, 1L, NA, NA)) expect_identical(percent_rank(df), c(2/3, 0/3, 3/3, 0/3, NA, NA)) expect_identical(cume_dist(df), c(3/4, 2/4, 4/4, 2/4, NA, NA)) expect_identical(ntile(df, 2), c(2L, 1L, 2L, 1L, NA, NA)) expect_identical(ntile(df, 4), c(3L, 1L, 4L, 2L, NA, NA)) }) # row_number() -------------------------------------------------------------- test_that("zero-arg row_number() works in mutate", { n <- c(1, 5, 2, 9) df <- tibble(id = rep(letters[1:4], n)) expect_equal(mutate(df, rn = row_number())$rn, 1:sum(n)) gf <- group_by(df, id) expect_equal(mutate(gf, rn = row_number())$rn, sequence(n)) }) # ntile() ------------------------------------------------------------------- test_that("ntile puts biggest groups first (#4995) ", { expect_equal(ntile(1, 5), 1) expect_equal(ntile(1:2, 5), 1:2) expect_equal(ntile(1:3, 5), 1:3) expect_equal(ntile(1:4, 5), 1:4) expect_equal(ntile(1:5, 5), 1:5) expect_equal(ntile(1:6, 5), c(1, 1:5)) expect_equal(ntile(1, 7), 1) expect_equal(ntile(1:2, 7), 1:2) expect_equal(ntile(1:3, 7), 1:3) expect_equal(ntile(1:4, 7), 1:4) expect_equal(ntile(1:5, 7), 1:5) expect_equal(ntile(1:6, 7), 1:6) expect_equal(ntile(1:7, 7), 1:7) expect_equal(ntile(1:8, 7), c(1, 1:7)) }) test_that("ntile ignores NAs", { x <- c(1:3, NA, NA, NA) expect_equal(ntile(x, 3), x) x1 <- c(1L, 1L, 1L, NA, NA, NA) expect_equal(ntile(x, 1), x1) }) test_that("ntile always returns an integer", { expect_equal(ntile(numeric(), 3), integer()) expect_equal(ntile(NA, 3), NA_integer_) }) test_that("ntile() does not overflow (#4186)", { out <- ntile(1:1e5, n = 1e5) expect_equal(out, 1:1e5) }) test_that("ntile() works with one argument (#3418)", { df <- tibble(id = c(1, 1, 2, 2, 2), x = 1:5) expect_equal(mutate(df, out = ntile(n = 3))$out, c(1, 1, 2, 2, 3)) gf <- group_by(df, id) expect_equal(mutate(gf, out = ntile(n = 2))$out, c(1, 2, 1, 1, 2)) }) test_that("ntile() validates `n`", { expect_snapshot(error = TRUE, { ntile(1, n = 1.5) }) expect_snapshot(error = TRUE, { ntile(1, n = c(1, 2)) }) expect_snapshot(error = TRUE, { ntile(1, n = NA_real_) }) expect_snapshot(error = TRUE, { ntile(1, n = 0) }) }) dplyr/tests/testthat/test-group-map.R0000644000176200001440000000645314366556340017441 0ustar liggesuserstest_that("group_map() respects empty groups", { res <- group_by(mtcars, cyl) %>% group_map(~ head(.x, 2L)) expect_equal(length(res), 3L) res <- iris %>% group_by(Species) %>% filter(Species == "setosa") %>% group_map(~ tally(.x)) expect_equal(length(res), 1L) res <- iris %>% group_by(Species, .drop = FALSE) %>% filter(Species == "setosa") %>% group_map(~ tally(.x)) expect_equal(length(res), 3L) }) test_that("group_map() can return arbitrary objects", { expect_equal( group_by(mtcars, cyl) %>% group_map(~ 10), rep(list(10), 3) ) }) test_that("group_map() works on ungrouped data frames (#4067)", { expect_identical( group_map(mtcars, ~ head(.x, 2L)), list(head(as_tibble(mtcars), 2L)) ) }) test_that("group_modify() makes a grouped_df", { res <- group_by(mtcars, cyl) %>% group_modify(~ head(.x, 2L)) expect_equal(nrow(res), 6L) expect_equal(group_rows(res), list_of(1:2, 3:4, 5:6)) res <- iris %>% group_by(Species) %>% filter(Species == "setosa") %>% group_modify(~ tally(.x)) expect_equal(nrow(res), 1L) expect_equal(group_rows(res), list_of(1L)) res <- iris %>% group_by(Species, .drop = FALSE) %>% filter(Species == "setosa") %>% group_modify(~ tally(.x)) expect_equal(nrow(res), 3L) expect_equal(as.list(group_rows(res)), list(1L, 2L, 3L)) }) test_that("group_modify() and group_map() want functions with at least 2 arguments, or ... (#3996)", { head1 <- function(d, ...) head(d, 1) g <- iris %>% group_by(Species) expect_equal(nrow(group_modify(g, head1)), 3L) expect_equal(length(group_map(g, head1)), 3L) }) test_that("group_modify() works on ungrouped data frames (#4067)", { expect_identical( group_modify(mtcars, ~ head(.x, 2L)), head(mtcars, 2L) ) }) test_that("group_map() uses ptype on empty splits (#4421)", { res <- mtcars %>% group_by(cyl) %>% filter(hp > 1000) %>% group_map(~.x) expect_equal(res, list(), ignore_attr = TRUE) ptype <- attr(res, "ptype") expect_equal(names(ptype), setdiff(names(mtcars), "cyl")) expect_equal(nrow(ptype), 0L) expect_s3_class(ptype, "data.frame") }) test_that("group_modify() uses ptype on empty splits (#4421)", { res <- mtcars %>% group_by(cyl) %>% filter(hp > 1000) %>% group_modify(~.x) expect_equal(res, group_by(mtcars[integer(0L), names(res)], cyl)) }) test_that("group_modify() works with additional arguments (#4509)", { myfun <- function(.x, .y, foo) { .x[[foo]] <- 1 .x } srcdata <- data.frame( A=rep(1:2, each = 3) ) %>% group_by(A) targetdata <- srcdata targetdata$bar <- 1 expect_equal( group_modify(.data = srcdata, .f = myfun, foo = "bar"), targetdata ) }) test_that("group_map() does not warn about .keep= for rowwise_df", { expect_warning( data.frame(x = 1) %>% rowwise() %>% group_walk(~ {}), NA ) }) test_that("group_map() give meaningful errors", { head1 <- function(d) head(d, 1) expect_snapshot({ # group_modify() (expect_error(mtcars %>% group_by(cyl) %>% group_modify(~ data.frame(cyl = 19)))) (expect_error(mtcars %>% group_by(cyl) %>% group_modify(~ 10))) (expect_error(iris %>% group_by(Species) %>% group_modify(head1))) # group_map() (expect_error(iris %>% group_by(Species) %>% group_map(head1))) }) }) dplyr/tests/testthat/test-colwise-group-by.R0000644000176200001440000000203414266276767020743 0ustar liggesuserstest_that("group_by_ verbs take scoped inputs", { expect_identical(group_vars(group_by_all(mtcars)), names(mtcars)) expect_identical(group_vars(group_by_at(mtcars, vars(starts_with("d")))), c("disp", "drat")) expect_identical(group_vars(group_by_if(iris, is.factor)), "Species") }) test_that("group_by_ verbs accept optional operations", { df <- tibble(x = 1:2, y = 2:3) gdf <- group_by(mutate_all(df, as.factor), x, y) expect_identical(group_by_all(df, as.factor), gdf) expect_identical(group_by_if(df, is_integer, as.factor), gdf) expect_identical(group_by_at(df, vars(x:y), as.factor), gdf) }) test_that("group_by variants can group by an already grouped by data (#3351)", { tbl <- tibble(gr1 = rep(1:2, 4), gr2 = rep(c(1, 2), each = 4), x = 1:8) %>% group_by(gr1) expect_identical( group_by_at(tbl, vars(gr1, gr2)), group_by(tbl, gr1, gr2) ) expect_identical( group_by_all(tbl), group_by(tbl, gr1, gr2, x) ) expect_identical( group_by_if(tbl, is.integer), group_by(tbl, gr1, x) ) }) dplyr/tests/testthat/test-rows.R0000644000176200001440000003407214366556340016522 0ustar liggesusers# ------------------------------------------------------------------------------ # rows_insert() test_that("rows_insert() works", { data <- tibble(a = 1:3, b = letters[c(1:2, NA)], c = 0.5 + 0:2) expect_identical( rows_insert(data, tibble(a = 4L, b = "z"), by = "a"), tibble(a = 1:4, b = c("a", "b", NA, "z"), c = c(0.5, 1.5, 2.5, NA)) ) }) test_that("rows_insert() doesn't allow insertion of matched keys by default", { x <- tibble(a = 1, b = 2) y <- tibble(a = 1, b = 3) expect_snapshot( (expect_error(rows_insert(x, y, by = "a"))) ) y <- tibble(a = c(1, 1, 1), b = c(3, 4, 5)) expect_snapshot( (expect_error(rows_insert(x, y, by = "a"))) ) }) test_that("rows_insert() allows you to ignore matched keys with `conflict = 'ignore'`", { x <- tibble(a = 1, b = 2) y <- tibble(a = 1, b = 3) expect_identical(rows_insert(x, y, by = "a", conflict = "ignore"), x) y <- tibble(a = c(1, 2, 1), b = c(3, 4, 5)) expect_identical( rows_insert(x, y, by = "a", conflict = "ignore"), rows_insert(x, y[2,], by = "a") ) }) test_that("rows_insert() allows `x` keys to be duplicated (#5553)", { x <- tibble(a = c(1, 1), b = c(2, 3)) y <- tibble(a = 2, b = 4) expect_identical( rows_insert(x, y, by = "a"), tibble(a = c(1, 1, 2), b = c(2, 3, 4)) ) }) test_that("rows_insert() allows `y` keys to be duplicated (#5553)", { x <- tibble(a = 2, b = 4) y <- tibble(a = c(1, 1), b = c(2, 3)) expect_identical( rows_insert(x, y, by = "a"), tibble(a = c(2, 1, 1), b = c(4, 2, 3)) ) }) test_that("rows_insert() casts keys to the type of `x`", { x <- vctrs::data_frame(key = 1L, value = 2) y <- vctrs::data_frame(key = 1.5, value = 1.5) expect_snapshot({ (expect_error(rows_insert(x, y, "key"))) }) }) test_that("rows_insert() casts values to the type of `x`", { x <- vctrs::data_frame(key = 1, value = 2L) y <- vctrs::data_frame(key = 2, value = 1.5) expect_snapshot({ (expect_error(rows_insert(x, y, "key"))) }) }) test_that("rows_insert() checks that `x` and `y` contain `by` (#6652)", { x <- tibble(a = 1, b = 2) y <- tibble(a = 1) expect_snapshot({ (expect_error(rows_insert(x, y, by = "c"))) }) expect_snapshot({ (expect_error(rows_insert(x, y, by = c("a", "b")))) }) }) test_that("`conflict` is validated", { x <- tibble(a = 1) y <- tibble(a = 2) expect_snapshot({ (expect_error(rows_insert(x, y, by = "a", conflict = "foo"))) (expect_error(rows_insert(x, y, by = "a", conflict = 1))) }) }) # ------------------------------------------------------------------------------ # rows_append() test_that("rows_append() allows you to insert unconditionally", { x <- tibble(a = 1, b = 2) y <- tibble(a = 1, b = 3) expect_identical(rows_append(x, y), bind_rows(x, y)) y <- tibble(a = c(1, 2, 1), b = c(3, 4, 5)) expect_identical(rows_append(x, y), bind_rows(x, y)) }) test_that("rows_append() casts to the type of `x`", { x <- vctrs::data_frame(key = 1L, value = 2) y <- vctrs::data_frame(key = 1.5, value = 1.5) expect_snapshot({ (expect_error(rows_append(x, y))) }) y <- vctrs::data_frame(key = 2, value = 3L) out <- rows_append(x, y) expect_identical(out$key, c(1L, 2L)) expect_identical(out$value, c(2, 3)) }) test_that("rows_append() requires that `y` columns be a subset of `x`", { x <- tibble(a = 1, b = 2) y <- tibble(a = 1, b = 2, c = 3) expect_snapshot({ (expect_error(rows_append(x, y))) }) }) test_that("rows_append() doesn't require that `x` columns be a subset of `y`", { x <- tibble(a = 1, b = 2, c = 3) y <- tibble(a = 1, b = 2) out <- rows_append(x, y) expect_identical(out$c, c(3, NA)) }) # ------------------------------------------------------------------------------ test_that("rows_update() works", { data <- tibble(a = 1:3, b = letters[c(1:2, NA)], c = 0.5 + 0:2) expect_identical( rows_update(data, tibble(a = 2:3, b = "z"), by = "a"), tibble(a = 1:3, b = c("a", "z", "z"), c = data$c) ) expect_silent( expect_identical( rows_update(data, tibble(b = "z", a = 2:3), by = "a"), tibble(a = 1:3, b = c("a", "z", "z"), c = data$c) ) ) }) test_that("rows_update() requires `y` keys to exist in `x` by default", { x <- tibble(a = 1, b = 2) y <- tibble(a = c(2, 1, 3), b = c(1, 1, 1)) expect_snapshot((expect_error(rows_update(x, y, "a")))) }) test_that("rows_update() allows `y` keys that don't exist in `x` to be ignored", { x <- tibble(a = 1, b = 2) y <- tibble(a = c(2, 1, 3), b = c(1, 1, 1)) expect_identical( rows_update(x, y, "a", unmatched = "ignore"), tibble(a = 1, b = 1) ) }) test_that("rows_update() allows `x` keys to be duplicated (#5553)", { x <- tibble(a = c(1, 2, 1, 3), b = c(2, 3, 4, 5), c = letters[1:4]) y <- tibble(a = c(1, 3), b = c(99, 88)) expect_identical( rows_update(x, y, by = "a"), tibble(a = c(1, 2, 1, 3), b = c(99, 3, 99, 88), c = letters[1:4]) ) }) test_that("rows_update() doesn't allow `y` keys to be duplicated (#5553)", { x <- tibble(a = 2, b = 4) y <- tibble(a = c(1, 1), b = c(2, 3)) expect_snapshot((expect_error(rows_update(x, y, by = "a")))) }) test_that("rows_update() avoids bare data.frame `drop = FALSE` issues", { x <- vctrs::data_frame(x = c(1, 2, 3), y = I(list(1:2, 3:4, 5:6))) y <- vctrs::data_frame(x = c(1, 3), y = I(list(0L, 100:101))) out <- rows_update(x, y, "x") expect_identical(out$y, I(list(0L, 3:4, 100:101))) }) test_that("rows_update() casts keys to their common type for matching but retains `x` type", { x <- vctrs::data_frame(key = 1L, value = 2) y <- vctrs::data_frame(key = 1, value = 1.5) out <- rows_update(x, y, "key") expect_identical(out$key, x$key) expect_identical(out$value, y$value) y <- vctrs::data_frame(key = "x", value = 1.5) expect_snapshot({ (expect_error(rows_update(x, y, "key"))) }) }) test_that("rows_update() casts values to the type of `x`", { x <- vctrs::data_frame(key = 1, value = 2L) y <- vctrs::data_frame(key = 1, value = 1.5) expect_snapshot({ (expect_error(rows_update(x, y, "key"))) }) out <- rows_update(y, x, "key") expect_identical(out$value, 2) }) test_that("`unmatched` is validated", { x <- tibble(a = 1) y <- tibble(a = 1) expect_snapshot({ (expect_error(rows_update(x, y, by = "a", unmatched = "foo"))) (expect_error(rows_update(x, y, by = "a", unmatched = 1))) }) }) # ------------------------------------------------------------------------------ test_that("rows_patch() works", { data <- tibble(a = 1:3, b = letters[c(1:2, NA)], c = 0.5 + 0:2) expect_identical( rows_patch(data, tibble(a = 2:3, b = "z"), by = "a"), tibble(a = 1:3, b = c("a", "b", "z"), c = data$c) ) expect_silent( expect_identical( rows_patch(data, tibble(b = "z", a = 2:3), by = "a"), tibble(a = 1:3, b = c("a", "b", "z"), c = data$c) ) ) }) test_that("rows_patch() requires `y` keys to exist in `x` by default", { x <- tibble(a = 1, b = 2) y <- tibble(a = c(2, 1, 3), b = c(1, 1, 1)) expect_snapshot((expect_error(rows_patch(x, y, "a")))) }) test_that("rows_patch() allows `y` keys that don't exist in `x` to be ignored", { x <- tibble(a = 1, b = NA_real_) y <- tibble(a = c(2, 1, 3), b = c(1, 1, 1)) expect_identical( rows_patch(x, y, "a", unmatched = "ignore"), tibble(a = 1, b = 1) ) }) test_that("rows_patch() allows `x` keys to be duplicated (#5553)", { x <- tibble(a = c(1, 2, 1, 3), b = c(NA, 3, 4, NA), c = letters[1:4]) y <- tibble(a = c(1, 3), b = c(99, 88)) expect_identical( rows_patch(x, y, by = "a"), tibble(a = c(1, 2, 1, 3), b = c(99, 3, 4, 88), c = letters[1:4]) ) }) test_that("rows_patch() doesn't allow `y` keys to be duplicated (#5553)", { x <- tibble(a = 2, b = 4) y <- tibble(a = c(1, 1), b = c(2, 3)) expect_snapshot((expect_error(rows_patch(x, y, by = "a")))) }) test_that("rows_patch() avoids bare data.frame `drop = FALSE` issues", { x <- vctrs::data_frame(x = c(1, 2, 3, 3), y = c(NA, 5, NA, 6)) y <- vctrs::data_frame(x = c(1, 3), y = c(0, 100)) out <- rows_patch(x, y, "x") expect_identical(out$y, c(0, 5, 100, 6)) }) test_that("rows_patch() casts keys to their common type for matching but retains `x` type", { x <- vctrs::data_frame(key = 1L, value = NA_real_) y <- vctrs::data_frame(key = 1, value = 1.5) out <- rows_patch(x, y, "key") expect_identical(out$key, x$key) expect_identical(out$value, y$value) y <- vctrs::data_frame(key = "x", value = 1.5) expect_snapshot({ (expect_error(rows_patch(x, y, "key"))) }) }) test_that("rows_patch() casts values to the type of `x`", { x <- vctrs::data_frame(key = 1, value = 2L) y <- vctrs::data_frame(key = 1, value = 1.5) expect_snapshot({ (expect_error(rows_patch(x, y, "key"))) }) }) # ------------------------------------------------------------------------------ test_that("rows_upsert() works", { data <- tibble(a = 1:3, b = letters[c(1:2, NA)], c = 0.5 + 0:2) expect_identical( rows_upsert(data, tibble(a = 2:4, b = "z"), by = "a"), tibble(a = 1:4, b = c("a", "z", "z", "z"), c = c(data$c, NA)) ) }) test_that("rows_upsert() allows `x` keys to be duplicated (#5553)", { x <- tibble(a = c(1, 2, 1, 3), b = c(NA, 3, 4, NA), c = letters[1:4]) y <- tibble(a = c(1, 3, 4), b = c(99, 88, 100)) expect_identical( rows_upsert(x, y, by = "a"), tibble(a = c(1, 2, 1, 3, 4), b = c(99, 3, 99, 88, 100), c = c(letters[1:4], NA)) ) }) test_that("rows_upsert() doesn't allow `y` keys to be duplicated (#5553)", { x <- tibble(a = 2, b = 4) y <- tibble(a = c(1, 1), b = c(2, 3)) expect_snapshot((expect_error(rows_upsert(x, y, by = "a")))) }) test_that("rows_upsert() avoids bare data.frame `drop = FALSE` issues", { x <- vctrs::data_frame(x = c(1, 2, 3), y = I(list(1:2, 3:4, 5:6))) y <- vctrs::data_frame(x = c(1, 3, 4), y = I(list(0L, 100:101, -1L))) out <- rows_upsert(x, y, "x") expect_identical(out$y, I(list(0L, 3:4, 100:101, -1L))) }) test_that("rows_upsert() casts keys to their common type for matching but retains `x` type", { x <- vctrs::data_frame(key = 1L, value = 2) y <- vctrs::data_frame(key = c(2, 1), value = c(1.5, 2.5)) out <- rows_upsert(x, y, "key") expect_identical(out$key, c(1L, 2L)) expect_identical(out$value, c(2.5, 1.5)) y <- vctrs::data_frame(key = "x", value = 1.5) expect_snapshot({ (expect_error(rows_upsert(x, y, "key"))) }) }) test_that("rows_upsert() casts keys to the type of `x`", { x <- vctrs::data_frame(key = 1L, value = 2) y <- vctrs::data_frame(key = 1.5, value = 1.5) expect_snapshot({ (expect_error(rows_upsert(x, y, "key"))) }) }) test_that("rows_upsert() casts values to the type of `x`", { x <- vctrs::data_frame(key = 1, value = 2L) y <- vctrs::data_frame(key = 1, value = 1.5) expect_snapshot({ (expect_error(rows_upsert(x, y, "key"))) }) }) # ------------------------------------------------------------------------------ test_that("rows_delete() works", { data <- tibble(a = 1:3, b = letters[c(1:2, NA)], c = 0.5 + 0:2) expect_identical( rows_delete(data, tibble(a = 2:3), by = "a"), data[1, ] ) }) test_that("rows_delete() ignores extra `y` columns, with a message", { x <- tibble(a = 1) y <- tibble(a = 1, b = 2) expect_snapshot({ out <- rows_delete(x, y) }) expect_identical(out, x[0,]) expect_snapshot({ out <- rows_delete(x, y, by = "a") }) expect_identical(out, x[0,]) }) test_that("rows_delete() requires `y` keys to exist in `x` by default", { x <- tibble(a = 1, b = 2) y <- tibble(a = c(2, 1, 3), b = c(1, 1, 1)) expect_snapshot((expect_error(rows_delete(x, y, "a")))) }) test_that("rows_delete() allows `y` keys that don't exist in `x` to be ignored", { x <- tibble(a = 1, b = 2) y <- tibble(a = c(2, 1, 3)) expect_identical( rows_delete(x, y, "a", unmatched = "ignore"), tibble(a = double(), b = double()) ) }) test_that("rows_delete() allows `x` keys to be duplicated (#5553)", { x <- tibble(a = c(1, 2, 1, 3), b = c(NA, 3, 4, NA), c = letters[1:4]) y <- tibble(a = c(1, 3)) expect_identical( rows_delete(x, y, by = "a"), x[2,] ) }) test_that("rows_delete() allows `y` keys to be duplicated (#5553)", { x <- tibble(a = c(1, 2, 3), b = c(4, 5, 6)) y <- tibble(a = c(1, 1)) expect_identical( rows_delete(x, y, by = "a"), x[c(2, 3),] ) }) test_that("rows_delete() casts keys to their common type for matching but retains `x` type", { x <- vctrs::data_frame(key = c(1L, 2L), value = c("x", "y")) y <- vctrs::data_frame(key = 2) out <- rows_delete(x, y, "key") expect_identical(out$key, 1L) y <- vctrs::data_frame(key = "x", value = 1.5) expect_snapshot({ (expect_error(rows_delete(x, y, "key"))) }) }) # ------------------------------------------------------------------------------ # Common errors test_that("rows_check_x_contains_y() checks that `y` columns are in `x`", { x <- tibble(a = 1) y <- tibble(a = 1, b = 2) expect_snapshot((expect_error(rows_check_x_contains_y(x, y)))) }) test_that("rows_check_by() checks that `y` has at least 1 column before using it (#6061)", { y <- tibble() expect_snapshot((expect_error(rows_check_by(by = NULL, y = y)))) }) test_that("rows_check_by() uses the first column from `y` by default, with a message", { y <- tibble(a = 1, b = 2) expect_snapshot( by <- rows_check_by(by = NULL, y = y) ) expect_identical(by, "a") }) test_that("rows_check_by() validates `by`", { y <- tibble(x = 1) expect_snapshot({ (expect_error(rows_check_by(by = 1, y = y))) (expect_error(rows_check_by(by = character(), y = y))) (expect_error(rows_check_by(by = c(x = "y"), y = y))) }) }) test_that("rows_check_contains_by() checks that all `by` columns are in `x`", { x <- tibble(x = 1) expect_snapshot({ (expect_error(rows_check_contains_by(x, "y", arg = "x"))) (expect_error(rows_check_contains_by(x, c("y", "x", "z"), arg = "y"))) }) }) test_that("rows_check_unique() requires uniqueness", { x <- tibble(x = c(1, 1, 1), y = c(2, 3, 2), z = c(1, 2, 3)) expect_silent(rows_check_unique(x, "x")) expect_snapshot({ (expect_error(rows_check_unique(x["x"], "x"))) (expect_error(rows_check_unique(x[c("x", "y")], "y"))) }) }) dplyr/tests/testthat/test-colwise-select.R0000644000176200001440000001326214366556340020450 0ustar liggesusersdf <- tibble(x = 0L, y = 0.5, z = 1) test_that("can select/rename all variables", { expect_identical(select_all(df), df) expect_identical(select_all(df, toupper), set_names(df, c("X", "Y", "Z"))) expect_identical(select_all(df, toupper), rename_all(df, toupper)) }) test_that("can select/rename with predicate", { expect_identical(select_if(df, is_integerish), select(df, x, z)) expect_identical(select_if(df, is_integerish, toupper), set_names(df[c("x", "z")], c("X", "Z"))) expect_identical(rename_if(df, is_integerish, toupper), set_names(df, c("X", "y", "Z"))) }) test_that("can take list, but only containing single function", { expect_identical( select_if(df, list(~ is_integerish(.)), list(~ toupper(.))), set_names(df[c("x", "z")], c("X", "Z")) ) expect_identical( rename_if(df, list(~ is_integerish(.)), list(~ toupper(.))), set_names(df, c("X", "y", "Z")) ) }) test_that("can select/rename with vars()", { expect_identical(select_at(df, vars(x:y)), df[-3]) expect_identical(select_at(df, vars(x:y), toupper), set_names(df[-3], c("X", "Y"))) expect_identical(rename_at(df, vars(x:y), toupper), set_names(df, c("X", "Y", "z"))) }) test_that("select variants can use grouping variables (#3351, #3480)", { tbl <- tibble(gr1 = rep(1:2, 4), gr2 = rep(1:2, each = 4), x = 1:8) %>% group_by(gr1) expect_identical( select(tbl, gr1), select_at(tbl, vars(gr1)) ) expect_identical( select_all(tbl), tbl ) expect_identical( select_if(tbl, is.integer), tbl ) }) test_that("select_if keeps grouping cols", { by_species <- iris %>% group_by(Species) expect_silent(df <- by_species %>% select_if(is.numeric)) expect_equal(df, by_species[c(5, 1:4)]) }) test_that("select_if() handles non-syntactic colnames", { df <- tibble(`x 1` = 1:3) expect_identical(select_if(df, is_integer)[[1]], 1:3) }) test_that("select_if() handles quoted predicates", { expected <- select_if(mtcars, is_integerish) expect_identical(select_if(mtcars, "is_integerish"), expected) expect_identical(select_if(mtcars, ~ is_integerish(.x)), expected) }) test_that("rename_all() works with grouped data (#3363)", { df <- data.frame(a = 1, b = 2) out <- df %>% group_by(a) %>% rename_all(toupper) expect_identical(out, group_by(data.frame(A = 1, B = 2), A)) }) test_that("scoping (#3426)", { interface <- function(.tbl, .funs = list()) { impl(.tbl, .funs = .funs) } impl <- function(.tbl, ...) { select_all(.tbl, ...) } expect_identical( interface(mtcars, .funs = toupper), select_all(mtcars, .funs = list(toupper)) ) }) test_that("rename variants can rename a grouping variable (#3351)", { tbl <- tibble(gr1 = rep(1:2, 4), gr2 = rep(1:2, each = 4), x = 1:8) %>% group_by(gr1) res <- rename(tbl, GR1 = gr1, GR2 = gr2, X = x) expect_identical( rename_at(tbl, vars(everything()), toupper), res ) expect_identical( rename_all(tbl, toupper), res ) expect_identical( rename_if(tbl, is.integer, toupper), res ) }) test_that("select_all does not change the order of columns (#3351)", { tbl <- group_by(tibble(x = 1:4, y = 4:1), y) expect_identical(select_all(tbl), tbl) tbl <- group_by(tibble(x = 1:4, y = 4:1), x) expect_identical(select_all(tbl), tbl) tbl <- group_by(tibble(x = 1:4, y = 4:1, z = 1:4), y) expect_identical(select_all(tbl), tbl) }) test_that("mutate_all does not change the order of columns (#3351)", { tbl <- group_by(tibble(x = 1:4, y = 1:4), y) expect_message(expect_identical(names(mutate_all(tbl, identity)), names(tbl)), "ignored") tbl <- group_by(tibble(x = 1:4, y = 1:4), x) expect_message(expect_identical(names(mutate_all(tbl, identity)), names(tbl)), "ignored") tbl <- group_by(tibble(x = 1:4, y = 1:4, z = 1:4), y) expect_message(expect_identical(names(mutate_all(tbl, identity)), names(tbl)), "ignored") }) test_that("select_if() and rename_if() handles logical (#4213)", { ids <- "Sepal.Length" expect_identical( iris %>% select_if(!names(.) %in% ids), iris %>% select(-Sepal.Length) ) expect_identical( iris %>% rename_if(!names(.) %in% ids, toupper), iris %>% rename_at(setdiff(names(.), "Sepal.Length"), toupper) ) }) test_that("rename_at() handles empty selection (#4324)", { expect_identical( mtcars %>% rename_at(vars(contains("fake_col")),~paste0("NewCol.",.)), mtcars ) }) test_that("rename_all/at() call the function with simple character vector (#4459)", { fun <- function(x) case_when(x == 'mpg' ~ 'fuel_efficiency', .default = x) out <- rename_all(mtcars,fun) expect_equal(names(out)[1L], 'fuel_efficiency') out <- rename_at(mtcars, vars(everything()), fun) expect_equal(names(out)[1L], 'fuel_efficiency') }) test_that("select_if() discards the column when predicate gives NA (#4486)", { out <- tibble(mycol=c("","",NA)) %>% select_if(~!all(.=="")) expect_identical( out, tibble::new_tibble(list(), nrow = 3L) ) }) # Errors ------------------------------------------------------------------ test_that("colwise select() / rename() give meaningful errors", { expect_snapshot({ df <- tibble(x = 0L, y = 0.5, z = 1) # colwise rename() (expect_error( df %>% rename_all() )) (expect_error( df %>% rename_if(is_integerish) )) (expect_error( df %>% rename_at(vars(x:y)) )) (expect_error( df %>% rename_all(list(tolower, toupper)) )) # colwise select() (expect_error( df %>% select_all(list(tolower, toupper)) )) (expect_error( df %>% select_if(function(.x) 1) )) (expect_error( df %>% select_if(function(.x) c(TRUE, TRUE)) )) (expect_error( data.frame() %>% select_all(.funs = 42) )) }) }) dplyr/tests/testthat/test-colwise.R0000644000176200001440000000204314266276767017201 0ustar liggesuserstest_that("tbl_at_vars() treats `NULL` as empty inputs", { expect_identical(tbl_at_vars(mtcars, vars(NULL)), tbl_at_vars(mtcars, vars())) expect_identical( tibble::remove_rownames(mutate_at(mtcars, vars(NULL), `*`, 100)), tibble::remove_rownames(mtcars) ) }) test_that("lists of formulas are auto-named", { df <- tibble(x = 1:3, y = 4:6) out <- df %>% summarise_all(list(~ mean(.), ~sd(.x, na.rm = TRUE))) expect_named(out, c("x_mean", "y_mean", "x_sd", "y_sd")) out <- df %>% summarise_all(list(foobar = ~ mean(.), ~sd(.x, na.rm = TRUE))) expect_named(out, c("x_foobar", "y_foobar", "x_sd", "y_sd")) }) # Errors -------------------------------------------- test_that("colwise utils gives meaningful error messages", { expect_snapshot({ (expect_error( tbl_at_vars(iris, raw(3)) )) (expect_error( tbl_if_vars(iris, list(identity, force), environment()) )) .funs <- as_fun_list(list(identity, force), caller_env()) (expect_error( tbl_if_vars(iris, .funs, environment()) )) }) }) dplyr/tests/testthat/test-tbl.R0000644000176200001440000000062214266276767016316 0ustar liggesuserstest_that("tbl_nongroup_vars() excludes group variables", { gdf <- group_by(mtcars, cyl) expect_identical(tbl_nongroup_vars(gdf), setdiff(tbl_vars(gdf), "cyl")) }) test_that("tbl_vars() records groups", { gdf <- group_by(mtcars, cyl, am) expect_s3_class(tbl_vars(gdf), "dplyr_sel_vars") expect_true(is_sel_vars(tbl_vars(gdf))) expect_identical(tbl_vars(gdf) %@% groups, c("cyl", "am")) }) dplyr/tests/testthat/test-join-by.R0000644000176200001440000003060114525503021017052 0ustar liggesusers# ------------------------------------------------------------------------------ # `join_by()` test_that("works with equi conditions", { by <- join_by(x == y, a == b) expect_identical(by$x, c("x", "a")) expect_identical(by$y, c("y", "b")) expect_identical(by$condition, c("==", "==")) expect_identical(by$filter, c("none", "none")) }) test_that("works with non-equi conditions", { by <- join_by(x == y, a > b, a >= b, a < b, a <= b) expect_identical(by$x, c("x", rep("a", 4))) expect_identical(by$y, c("y", rep("b", 4))) expect_identical(by$condition, c("==", ">", ">=", "<", "<=")) }) test_that("works with `closest()`", { by <- join_by(x == y, closest(a >= b)) expect_identical(by$x, c("x", "a")) expect_identical(by$y, c("y", "b")) expect_identical(by$filter, c("none", "max")) expect_identical(by$condition, c("==", ">=")) by <- join_by(x == y, closest(a > b)) expect_identical(by$x, c("x", "a")) expect_identical(by$y, c("y", "b")) expect_identical(by$filter, c("none", "max")) expect_identical(by$condition, c("==", ">")) by <- join_by(x == y, closest(a <= b)) expect_identical(by$x, c("x", "a")) expect_identical(by$y, c("y", "b")) expect_identical(by$filter, c("none", "min")) expect_identical(by$condition, c("==", "<=")) by <- join_by(x == y, closest(a < b)) expect_identical(by$x, c("x", "a")) expect_identical(by$y, c("y", "b")) expect_identical(by$filter, c("none", "min")) expect_identical(by$condition, c("==", "<")) }) test_that("works with single arguments", { by <- join_by(a, b) expect_identical(by$x, c("a", "b")) expect_identical(by$y, c("a", "b")) }) test_that("works with character strings", { by1 <- join_by("a", "b" == "c", closest("d" >= "e")) by2 <- join_by(a, b == c, closest(d >= e)) expect_identical(by1$condition, by2$condition) expect_identical(by1$filter, by2$filter) expect_identical(by1$x, by2$x) expect_identical(by1$y, by2$y) }) test_that("works with explicit referencing", { by <- join_by(x$a == y$b) expect_identical(by$x, "a") expect_identical(by$y, "b") by <- join_by(y$a == x$b) expect_identical(by$x, "b") expect_identical(by$y, "a") }) test_that("join condition is correctly reversed with explicit referencing", { by <- join_by(y$a == x$a, y$a >= x$a, y$a > x$a, y$a <= x$a, y$a < x$a) expect_identical(by$condition, c("==", "<=", "<", ">=", ">")) }) test_that("`closest()` works with explicit referencing", { by <- join_by(closest(y$a <= x$b), closest(y$a > x$b)) expect_identical(by$x, c("b", "b")) expect_identical(by$y, c("a", "a")) expect_identical(by$filter, c("max", "min")) expect_identical(by$condition, c(">=", "<")) }) test_that("between conditions expand correctly", { by <- join_by(between(a, b, c)) expect_identical(by$x, c("a", "a")) expect_identical(by$y, c("b", "c")) by <- join_by(between(y$a, x$b, x$c)) expect_identical(by$x, c("b", "c")) expect_identical(by$y, c("a", "a")) by <- join_by(between(a, b, c, bounds = "[]")) expect_identical(by$condition, c(">=", "<=")) by <- join_by(between(a, b, c, bounds = "[)")) expect_identical(by$condition, c(">=", "<")) by <- join_by(between(a, b, c, bounds = "(]")) expect_identical(by$condition, c(">", "<=")) by <- join_by(between(a, b, c, bounds = "()")) expect_identical(by$condition, c(">", "<")) by <- join_by(between(y$a, x$b, x$c, bounds = "[]")) expect_identical(by$condition, c("<=", ">=")) by <- join_by(between(y$a, x$b, x$c, bounds = "[)")) expect_identical(by$condition, c("<=", ">")) by <- join_by(between(y$a, x$b, x$c, bounds = "(]")) expect_identical(by$condition, c("<", ">=")) by <- join_by(between(y$a, x$b, x$c, bounds = "()")) expect_identical(by$condition, c("<", ">")) }) test_that("within conditions expand correctly", { by <- join_by(within(a, b, c, d)) expect_identical(by$x, c("a", "b")) expect_identical(by$y, c("c", "d")) expect_identical(by$condition, c(">=", "<=")) by <- join_by(within(y$a, y$b, x$b, x$c)) expect_identical(by$x, c("b", "c")) expect_identical(by$y, c("a", "b")) expect_identical(by$condition, c("<=", ">=")) }) test_that("overlaps conditions expand correctly", { by <- join_by(overlaps(a, b, c, d)) expect_identical(by$x, c("a", "b")) expect_identical(by$y, c("d", "c")) by <- join_by(overlaps(y$a, y$b, x$b, x$c)) expect_identical(by$x, c("c", "b")) expect_identical(by$y, c("a", "b")) by <- join_by(overlaps(a, b, c, d, bounds = "[]")) expect_identical(by$condition, c("<=", ">=")) by <- join_by(overlaps(a, b, c, d, bounds = "[)")) expect_identical(by$condition, c("<", ">")) by <- join_by(overlaps(a, b, c, d, bounds = "(]")) expect_identical(by$condition, c("<", ">")) by <- join_by(overlaps(a, b, c, d, bounds = "()")) expect_identical(by$condition, c("<", ">")) by <- join_by(overlaps(y$a, y$b, x$b, x$c, bounds = "[]")) expect_identical(by$condition, c(">=", "<=")) by <- join_by(overlaps(y$a, y$b, x$b, x$c, bounds = "[)")) expect_identical(by$condition, c(">", "<")) by <- join_by(overlaps(y$a, y$b, x$b, x$c, bounds = "(]")) expect_identical(by$condition, c(">", "<")) by <- join_by(overlaps(y$a, y$b, x$b, x$c, bounds = "()")) expect_identical(by$condition, c(">", "<")) }) test_that("between / overlaps / within / closest can use named arguments", { by <- join_by(between(a, y_upper = b, y_lower = c)) expect_identical(by$x, c("a", "a")) expect_identical(by$y, c("c", "b")) by <- join_by(overlaps(y_lower = c, y_upper = d, x_lower = a, x_upper = b)) expect_identical(by$x, c("a", "b")) expect_identical(by$y, c("d", "c")) expect_identical(by$condition, c("<=", ">=")) by <- join_by(overlaps(y_lower = x$c, y_upper = x$d, x_lower = y$a, x_upper = y$b)) expect_identical(by$x, c("d", "c")) expect_identical(by$y, c("a", "b")) expect_identical(by$condition, c(">=", "<=")) by <- join_by(within(y_lower = c, y_upper = d, x_lower = a, x_upper = b)) expect_identical(by$x, c("a", "b")) expect_identical(by$y, c("c", "d")) expect_identical(by$condition, c(">=", "<=")) by <- join_by(within(y_lower = x$c, y_upper = x$d, x_lower = y$a, x_upper = y$b)) expect_identical(by$x, c("c", "d")) expect_identical(by$y, c("a", "b")) expect_identical(by$condition, c("<=", ">=")) by <- join_by(closest(expr = a > b)) expect_identical(by$x, "a") expect_identical(by$y, "b") }) test_that("joining by nothing is an error", { expect_snapshot(error = TRUE, { join_by() }) }) test_that("can pass `...` on to wrapped `join_by()`", { fn <- function(...) { join_by(...) } fn2 <- function(x) { fn({{x}} == y) } expect_identical(fn(x == y, a <= b), join_by(x == y, a <= b)) expect_identical(fn2(a), join_by(a == y)) }) test_that("can wrap `join_by()` and use embracing to inject columns (#6469)", { fn <- function(x) { join_by({{x}} == y) } expect_identical(fn("foo"), join_by("foo" == y)) # Expression substitution, not quosure evaluation a <- "foo" expect_identical(fn(a), join_by(a == y)) # But you can inline with `!!` expect_identical(fn(!!a), join_by("foo" == y)) fn <- function(x, top) { join_by(between({{x}}, lower, {{top}})) } expect_identical(fn(x, y), join_by(between(x, lower, y))) }) test_that("can wrap `join_by()` and use embracing to inject expressions", { fn <- function(expr) { join_by({{expr}}, a <= b) } expect_identical(fn(a == b), join_by(a == b, a <= b)) }) test_that("nicely catches required missing arguments when wrapped", { fn <- function(x, y) { join_by({{x}} == {{y}}) } expect_snapshot(error = TRUE, fn(a)) }) test_that("allows for namespaced helpers (#6838)", { # Captures namespaced expression for printing expect_snapshot(join_by(dplyr::between(x, left, right))) expect_snapshot(join_by(dplyr::within(xl, xu, yl, yu))) expect_snapshot(join_by(dplyr::overlaps(xl, xu, yl, yu))) expect_snapshot(join_by(dplyr::closest(x < y))) # Underlying values are otherwise the same as non-namespaced version by <- join_by(dplyr::between(x, left, right)) reference <- join_by(between(x, left, right)) expect_identical(by$condition, reference$condition) expect_identical(by$filter, reference$filter) expect_identical(by$x, reference$x) expect_identical(by$y, reference$y) }) test_that("has an informative print method", { expect_snapshot(join_by(a, b)) expect_snapshot(join_by("a", "b")) expect_snapshot(join_by(a == a, b >= c)) expect_snapshot(join_by(a == a, b >= "c")) expect_snapshot(join_by(a == a, closest(b >= c), closest(d < e))) }) test_that("has informative error messages", { # `=` rather than `==` expect_snapshot(error = TRUE, join_by(a = b)) # Empty expression expect_snapshot(error = TRUE, join_by(NULL)) # Improper helper specification expect_snapshot(error = TRUE, join_by(foo(x > y))) # Improper separator expect_snapshot(error = TRUE, join_by(x == y, x ^ y)) # Improper LHS expect_snapshot(error = TRUE, join_by(x + 1 == y)) # Improper RHS expect_snapshot(error = TRUE, join_by(x == y + 1)) # Garbage input expect_snapshot(error = TRUE, join_by(1)) # Call with non-symbol first element expect_snapshot(error = TRUE, join_by(1())) # Namespace prefixed helper with non-dplyr namespace # (typo or re-export, which currently isn't allowed) expect_snapshot(error = TRUE, join_by(dplyrr::between(x, left, right))) # Top level usage of `$` expect_snapshot(error = TRUE, join_by(x$a)) # `$` must only contain x/y on LHS expect_snapshot(error = TRUE, join_by(z$a == y$b)) expect_snapshot(error = TRUE, join_by(x$a == z$b)) # Extra cautious check for horrible usage of `$` expect_snapshot(error = TRUE, join_by(`$`(x+1, y) == b)) # Referencing the same table expect_snapshot(error = TRUE, join_by(x$a == x$b)) expect_snapshot(error = TRUE, join_by(y$a == b)) expect_snapshot(error = TRUE, join_by(between(x$a, x$a, x$b))) expect_snapshot(error = TRUE, join_by(within(x$a, x$b, x$a, x$b))) expect_snapshot(error = TRUE, join_by(overlaps(a, b, x$a, x$b))) expect_snapshot(error = TRUE, join_by(closest(x$a >= x$b))) # Referencing different tables in lower/upper bound pairs expect_snapshot(error = TRUE, join_by(between(a, x$a, y$b))) expect_snapshot(error = TRUE, join_by(within(x$a, y$b, y$a, y$b))) expect_snapshot(error = TRUE, join_by(overlaps(x$a, x$b, y$a, x$b))) # Too few arguments expect_snapshot(error = TRUE, join_by(`>`(x))) expect_snapshot(error = TRUE, join_by(between(x))) expect_snapshot(error = TRUE, join_by(within(x))) expect_snapshot(error = TRUE, join_by(overlaps(x))) expect_snapshot(error = TRUE, join_by(closest())) expect_snapshot(error = TRUE, join_by(`$`(x) > y)) # Too many arguments expect_snapshot(error = TRUE, join_by(closest(a >= b, 1))) # `==` in `closest()` expect_snapshot(error = TRUE, join_by(closest(a == b))) # Non-expression in `closest()` expect_snapshot(error = TRUE, join_by(closest(x))) expect_snapshot(error = TRUE, join_by(closest(1))) # Invalid expression in `closest()` expect_snapshot(error = TRUE, join_by(closest(x + y))) # Invalid `bounds` in `between()` and `overlaps()` expect_snapshot(error = TRUE, join_by(between(x, lower, upper, bounds = 1))) expect_snapshot(error = TRUE, join_by(between(x, lower, upper, bounds = "a"))) expect_snapshot(error = TRUE, join_by(overlaps(x, y, lower, upper, bounds = 1))) expect_snapshot(error = TRUE, join_by(overlaps(x, y, lower, upper, bounds = "a"))) # Non-empty dots in `between()` and `overlaps()` expect_snapshot(error = TRUE, join_by(between(x, lower, upper, foo = 1))) expect_snapshot(error = TRUE, join_by(overlaps(x, y, lower, upper, foo = 1))) }) # ------------------------------------------------------------------------------ # `as_join_by()` test_that("as_join_by() emits useful errors", { expect_snapshot(error = TRUE, as_join_by(FALSE)) }) # ------------------------------------------------------------------------------ # `join_by_common()` test_that("automatically finds common variables", { x_names <- c("x", "y") y_names <- c("x", "z") expect_message(by <- join_by_common(x_names, y_names)) expect_identical(by$x, "x") expect_identical(by$y, "x") }) test_that("join_by_common() emits useful information", { # Common by message expect_snapshot(by <- join_by_common(c("x", "y"), c("x", "y"))) # Works with names that need backticks expect_snapshot(by <- join_by_common(c("_x", "foo bar"), c("_x", "foo bar"))) # No common variables error expect_snapshot(error = TRUE, join_by_common(c("x", "y"), c("w", "z"))) }) dplyr/tests/testthat/test-vec-case-match.R0000644000176200001440000001445414366556340020312 0ustar liggesuserstest_that("works like a vectorized switch", { out <- vec_case_match( needles = c(1, 4, 2, 1), haystacks = list(1, 2, 4), values = list("a", "b", "d") ) expect_identical(out, c("a", "d", "b", "a")) }) test_that("the first match in `haystacks` is always used", { out <- vec_case_match( needles = c(1, 4, 2, 1), haystacks = list(1, 2, 1, 4, 2), values = list("a", "b", "c", "d", "e") ) expect_identical(out, c("a", "d", "b", "a")) }) test_that("`haystacks` can contain multiple values", { out <- vec_case_match( needles = c(1, 4, 2, 1), haystacks = list(c(1, 2), c(4, 5)), values = list("a", "b") ) expect_identical(out, c("a", "b", "a", "a")) }) test_that("`values` can be vectorized on the size of `needles`", { out <- vec_case_match( needles = c(1, 4, 2, 1), haystacks = list(c(1, 2), c(4, 5)), values = list(1:4, 5:8) ) expect_identical(out, c(1L, 6L, 3L, 4L)) }) test_that("unmatched value falls through to `default`", { out <- vec_case_match( needles = c(1, 4, 2, 1, 5), haystacks = list(1, 2), values = list("a", "b") ) expect_identical(out, c("a", NA, "b", "a", NA)) out <- vec_case_match( needles = c(1, 4, 2, 1, 5), haystacks = list(1, 2), values = list("a", "b"), default = "na" ) expect_identical(out, c("a", "na", "b", "a", "na")) }) test_that("`default` can be vectorized on the size of `needles`", { out <- vec_case_match( needles = c(1, 4, 2, 1, 5), haystacks = list(1, 2), values = list("a", "b"), default = c("one", "two", "three", "four", "five") ) expect_identical(out, c("a", "two", "b", "a", "five")) }) test_that("unmatched missing values get `default`", { out <- vec_case_match( needles = c(1, 4, 2, NA, NA), haystacks = list(1, 2), values = list("a", "b") ) expect_identical(out, c("a", NA, "b", NA, NA)) out <- vec_case_match( needles = c(1, 4, 2, NA, NA), haystacks = list(1, 2), values = list("a", "b"), default = "na" ) expect_identical(out, c("a", "na", "b", "na", "na")) }) test_that("can exactly match on missing values", { out <- vec_case_match( needles = c(NA, NaN, NA), haystacks = list(NA, NaN), values = list("na", "nan") ) expect_identical(out, c("na", "nan", "na")) }) test_that("`haystacks` must be castable to `needles`", { expect_snapshot(error = TRUE, { vec_case_match(1L, haystacks = list(1.5), values = list(2)) }) }) test_that("`ptype` overrides `values` common type", { expect_identical( vec_case_match(1:2, haystacks = list(1), values = list(0), ptype = integer()), c(0L, NA) ) expect_snapshot(error = TRUE, { vec_case_match(1:2, haystacks = list(1), values = list(1.5), ptype = integer()) }) }) test_that("`default` is considered in the common type computation", { expect_identical( vec_case_match(1, haystacks = list(1), values = list(2L), default = 1.5), 2 ) }) test_that("`default` respects `ptype`", { expect_identical( vec_case_match( needles = 1, haystacks = list(1), values = list(2L), default = 1, ptype = integer() ), 2L ) expect_snapshot(error = TRUE, { vec_case_match( needles = 1, haystacks = list(1), values = list(2L), default = 1.5, ptype = integer() ) }) }) test_that("`NULL` values in `haystacks` and `values` are not dropped", { expect_snapshot(error = TRUE, { vec_case_match(1:2, list(1, NULL, 2), list("a", NULL, "b")) }) expect_snapshot(error = TRUE, { vec_case_match(1:2, list(1, NULL, 2), list("a", "a", "b")) }) expect_snapshot(error = TRUE, { vec_case_match(1:2, list(1, 1, 2), list("a", NULL, "b")) }) }) test_that("size of `needles` is maintained", { expect_snapshot(error = TRUE, { vec_case_match(1, haystacks = list(1), values = list(1:2)) }) }) test_that("requires at least one condition", { expect_snapshot(error = TRUE, { vec_case_match(1, haystacks = list(), values = list()) }) }) test_that("input must be a vector", { expect_snapshot(error = TRUE, { vec_case_match(environment(), haystacks = list(environment()), values = list(1)) }) }) test_that("`haystacks` must be a list", { expect_snapshot(error = TRUE, { vec_case_match(1, haystacks = 1, values = list(2)) }) }) test_that("`values` must be a list", { expect_snapshot(error = TRUE, { vec_case_match(1, haystacks = list(1), values = 2) }) }) test_that("`needles_arg` is respected", { expect_snapshot(error = TRUE, { vec_case_match( needles = environment(), haystacks = list(environment()), values = list(1), needles_arg = "foo" ) }) expect_snapshot(error = TRUE, { vec_case_match( needles = environment(), haystacks = list(environment()), values = list(1), needles_arg = "" ) }) }) test_that("`haystacks_arg` is respected", { expect_snapshot(error = TRUE, { vec_case_match( needles = 1, haystacks = 1, values = list(1), haystacks_arg = "foo" ) }) expect_snapshot(error = TRUE, { vec_case_match( needles = 1, haystacks = 1, values = list(1), haystacks_arg = "" ) }) expect_snapshot(error = TRUE, { vec_case_match( needles = 1, haystacks = list(a = "x"), values = list(1), haystacks_arg = "foo" ) }) expect_snapshot(error = TRUE, { vec_case_match( needles = 1, haystacks = list("x"), values = list(1), haystacks_arg = "" ) }) }) test_that("`values_arg` is respected", { expect_snapshot(error = TRUE, { vec_case_match( needles = 1, haystacks = list(1, 2), values = list("x", b = 1), values_arg = "foo" ) }) expect_snapshot(error = TRUE, { vec_case_match( needles = 1, haystacks = list(1, 2), values = list("x", b = 1), values_arg = "" ) }) }) test_that("`default_arg` is respected", { expect_snapshot(error = TRUE, { vec_case_match( needles = 1, haystacks = list(1), values = list(2L), default = 1.5, default_arg = "foo", ptype = integer() ) }) expect_snapshot(error = TRUE, { vec_case_match( needles = 1, haystacks = list(1), values = list(2L), default = 1.5, default_arg = "", ptype = integer() ) }) }) dplyr/tests/testthat/test-deprec-combine.R0000644000176200001440000001650314525507051020373 0ustar liggesuserstest_that("combine() is deprecated", { expect_snapshot(combine()) }) test_that("combine handles NULL (#1596, #3365)", { withr::local_options(lifecycle_verbosity = "quiet") expect_equal(combine(list(NULL, 1, 2)), c(1, 2)) expect_equal(combine(list(1, NULL, 2)), c(1, 2)) expect_equal(combine(list(1, 2, NULL)), c(1, 2)) expect_equal(combine(), logical()) expect_equal(combine(list(NULL)), logical()) expect_equal(combine(list(NULL, NULL), list(NULL)), list(NULL, NULL, NULL)) expect_equal(combine(NULL, list(NULL, NULL)), list(NULL, NULL)) }) test_that("combine works with input that used to fail (#1780)", { withr::local_options(lifecycle_verbosity = "quiet") no <- list(alpha = letters[1:3], omega = letters[24:26]) expect_equal(combine(no), unlist(no, use.names = FALSE)) }) test_that("combine works with NA and logical (#2203)", { withr::local_options(lifecycle_verbosity = "quiet") # NA first expected_result <- c(NA, TRUE, FALSE, NA, TRUE) works1 <- combine(list(NA, TRUE, FALSE, NA, TRUE)) expect_equal(works1, expected_result) # NA length == 1 expected_result <- c(TRUE, FALSE, NA, TRUE) works1 <- combine(list(TRUE, FALSE, NA, TRUE)) expect_equal(works1, expected_result) # NA length > 1 expected_result <- c(TRUE, FALSE, NA, NA, TRUE) works3 <- combine(list(TRUE, FALSE, c(NA, NA), TRUE)) expect_equal(works3, expected_result) }) test_that("combine works with NA and integers (#2203)", { withr::local_options(lifecycle_verbosity = "quiet") works <- combine(list(1L, 2L, NA, 4L)) expect_equal(works, c(1L, 2L, NA, 4L)) works <- combine(list(1L, 2L, c(NA, NA), 4L)) expect_equal(works, c(1L, 2L, NA, NA, 4L)) }) test_that("combine works with NA and factors (#2203)", { withr::local_options(lifecycle_verbosity = "quiet") # NA first fac <- factor(c("a", "c", NA, "b"), levels = letters[1:3]) expected_result <- fac[c(3, 1, 3, 2)] works1 <- combine(list(NA, fac[1], NA, fac[2])) expect_equal(works1, expected_result) # NA length == 1 expected_result <- fac works1 <- combine(list(fac[1], fac[2], fac[3], fac[4])) expect_equal(works1, expected_result) works2 <- combine(list(fac[1], fac[2], NA, fac[4])) expect_equal(works2, expected_result) # NA length > 1 expected_result <- fac[c(1, 2, 3, 3, 4)] works3 <- combine(list(fac[1], fac[2], fac[c(3, 3)], fac[4])) expect_equal(works3, expected_result) works4 <- combine(list(fac[1], fac[2], c(NA, NA), fac[4])) expect_equal(works4, expected_result) }) test_that("combine works with NA and double (#2203)", { withr::local_options(lifecycle_verbosity = "quiet") # NA first works <- combine(list(NA, 1.5, 2.5, NA, 4.5)) expect_equal(works, c(NA, 1.5, 2.5, NA, 4.5)) # NA length 1 works <- combine(list(1.5, 2.5, NA, 4.5)) expect_equal(works, c(1.5, 2.5, NA, 4.5)) # NA length > 1 works <- combine(list(1.5, 2.5, c(NA, NA), 4.5)) expect_equal(works, c(1.5, 2.5, NA, NA, 4.5)) }) test_that("combine works with NA and characters (#2203)", { withr::local_options(lifecycle_verbosity = "quiet") # NA first works <- combine(list(NA, "a", "b", "c", NA, "e")) expect_equal(works, c(NA, "a", "b", "c", NA, "e")) # NA length 1 works <- combine(list("a", "b", "c", NA, "e")) expect_equal(works, c("a", "b", "c", NA, "e")) # NA length > 1 works <- combine(list("a", "b", "c", c(NA, NA), "e")) expect_equal(works, c("a", "b", "c", NA, NA, "e")) }) test_that("combine works with NA and POSIXct (#2203)", { withr::local_options(lifecycle_verbosity = "quiet") # NA first works <- combine(list( NA, as.POSIXct("2010-01-01"), as.POSIXct("2010-01-02"), NA, as.POSIXct("2010-01-04") )) expect_equal( works, c(as.POSIXct(c(NA, "2010-01-01", "2010-01-02", NA, "2010-01-04"))), ignore_attr = TRUE ) # NA length 1 works <- combine(list( as.POSIXct("2010-01-01"), as.POSIXct("2010-01-02"), NA, as.POSIXct("2010-01-04") )) expect_equal( works, c(as.POSIXct(c("2010-01-01", "2010-01-02", NA, "2010-01-04"))), ignore_attr = TRUE ) # NA length > 1 works <- combine(list( as.POSIXct("2010-01-01"), as.POSIXct("2010-01-02"), c(NA, NA), as.POSIXct("2010-01-04") )) expect_equal( works, c(as.POSIXct(c("2010-01-01", "2010-01-02", NA, NA, "2010-01-04"))), ignore_attr = TRUE ) }) test_that("combine works with NA and Date (#2203)", { withr::local_options(lifecycle_verbosity = "quiet") # NA first expected_result <- as.Date("2010-01-01") + c(NA, 1, 2, NA, 4) expect_equal(combine(as.list(expected_result)), expected_result) # NA length == 1 expected_result <- c(as.Date(c("2010-01-01", "2010-01-02", NA, "2010-01-04"))) works1 <- combine(list( as.Date("2010-01-01"), as.Date("2010-01-02"), as.Date(NA), as.Date("2010-01-04") )) expect_equal(works1, expected_result) works2 <- combine(list( as.Date("2010-01-01"), as.Date("2010-01-02"), NA, as.Date("2010-01-04") )) expect_equal(works2, expected_result) # NA length > 1 expected_result <- as.Date("2010-01-01") + c(0, 1, NA, NA, 3) works1 <- combine(split(expected_result, c(1, 2, 3, 3, 4))) expect_equal(works1, expected_result) works2 <- combine(list( as.Date("2010-01-01"), as.Date("2010-01-02"), c(NA, NA), as.Date("2010-01-04") )) expect_equal(works2, expected_result) }) test_that("combine works with NA and complex (#2203)", { withr::local_options(lifecycle_verbosity = "quiet") # NA first expected_result <- c(NA_complex_, 1 + 2i) works1 <- combine(list(NA_complex_, 1 + 2i)) expect_equal(works1, expected_result) # NA length == 1 expected_result <- c(1 + 1i, 2 + 1i, NA_complex_, 4 + 1i) expect_equal(combine(as.list(expected_result)), expected_result) works2 <- combine(list(1 + 1i, 2 + 1i, NA_complex_, 4 + 1i)) expect_equal(works2, expected_result) # NA length > 1 expected_result <- c(1 + 1i, 2 + 1i, NA_complex_, NA_complex_, 4 + 1i) expect_equal( combine(split(expected_result, c(1, 2, 3, 3, 4))), expected_result ) works3 <- combine(list(1 + 1i, 2 + 1i, c(NA_complex_, NA_complex_), 4 + 1i)) expect_equal(works3, expected_result) }) test_that("combine works with difftime", { withr::local_options(lifecycle_verbosity = "quiet") expect_equal( combine(as.difftime(1, units = "mins"), as.difftime(1, units = "hours")), as.difftime(c(60, 3600), units = "secs") ) expect_equal( combine(as.difftime(1, units = "secs"), as.difftime(1, units = "secs")), as.difftime(c(1, 1), units = "secs") ) expect_equal( combine(as.difftime(1, units = "days"), as.difftime(1, units = "secs")), as.difftime(c(24 * 60 * 60, 1), units = "secs") ) expect_equal( combine(as.difftime(2, units = "weeks"), as.difftime(1, units = "secs")), as.difftime(c(2 * 7 * 24 * 60 * 60, 1), units = "secs") ) expect_equal( combine(as.difftime(2, units = "weeks"), as.difftime(3, units = "weeks")), as.difftime(c(2, 3), units = "weeks") ) }) test_that("combine uses tidy dots (#3407)", { withr::local_options(lifecycle_verbosity = "quiet") chunks <- list(1,2,3) expect_equal(combine(!!!chunks), c(1,2,3)) }) # Errors ------------------------------------------------------------------ test_that("combine() gives meaningful error messages", { withr::local_options(lifecycle_verbosity = "quiet") expect_snapshot({ (expect_error(combine("a", 1))) (expect_error(combine(factor("a"), 1L))) }) }) dplyr/tests/testthat/helper-lazy.R0000644000176200001440000000116014525503021016760 0ustar liggesusersskip_if_no_lazy_character <- function() { skip_if(getRversion() <= "3.5.0") new_lazy_character <- import_vctrs("new_lazy_character", optional = TRUE) lazy_character_is_materialized <- import_vctrs("lazy_character_is_materialized", optional = TRUE) if (is.null(new_lazy_character) || is.null(lazy_character_is_materialized)) { skip("Lazy character helpers from vctrs are not available.") } invisible() } new_lazy_character <- function(fn) { f <- import_vctrs("new_lazy_character") f(fn) } lazy_character_is_materialized <- function(x) { f <- import_vctrs("lazy_character_is_materialized") f(x) } dplyr/tests/testthat/test-group-by.R0000644000176200001440000004476714406402754017302 0ustar liggesusersdf <- data.frame(x = rep(1:3, each = 10), y = rep(1:6, each = 5)) test_that("group_by() with .add = TRUE adds groups", { add_groups1 <- function(tbl) group_by(tbl, x, y, .add = TRUE) add_groups2 <- function(tbl) group_by(group_by(tbl, x, .add = TRUE), y, .add = TRUE) expect_equal(group_vars(add_groups1(df)), c("x", "y")) expect_equal(group_vars(add_groups2(df)), c("x", "y")) }) test_that("group_by(, ) computes the expressions on the ungrouped data frame (#5938)", { df <- data.frame( x = 1:4, g = rep(1:2, each = 2) ) count <- 0 out <- df %>% group_by(g) %>% group_by(big = { count <<- count + 1; x > mean(x) }) expect_equal(out$big, c(FALSE, FALSE, TRUE, TRUE)) expect_equal(count, 1L) expect_equal(group_vars(out), c("big")) count <- 0 out <- df %>% group_by(g) %>% group_by(big = { count <<- count + 1; x > mean(x) }, .add = TRUE) expect_equal(out$big, c(FALSE, FALSE, TRUE, TRUE)) expect_equal(count, 1L) expect_equal(group_vars(out), c("g", "big")) count <- 0 out <- df %>% group_by(g) %>% mutate(big = { count <<- count + 1; x > mean(x)}) %>% group_by(big) expect_equal(out$big, c(FALSE, TRUE, FALSE, TRUE)) expect_equal(count, 2L) expect_equal(group_vars(out), c("big")) count <- 0 out <- df %>% group_by(g) %>% mutate(big = { count <<- count + 1; x > mean(x)}) %>% group_by(big, .add = TRUE) expect_equal(out$big, c(FALSE, TRUE, FALSE, TRUE)) expect_equal(count, 2L) expect_equal(group_vars(out), c("g", "big")) }) test_that("add = TRUE is deprecated", { rlang::local_options(lifecycle_verbosity = "warning") df <- tibble(x = 1, y = 2) expect_warning( out <- df %>% group_by(x) %>% group_by(y, add = TRUE), "deprecated" ) expect_equal(group_vars(out), c("x", "y")) }) test_that("joins preserve grouping", { df <- data.frame(x = rep(1:2, each = 4), y = rep(1:4, each = 2)) g <- group_by(df, x) expect_equal(group_vars(inner_join(g, g, by = c("x", "y"), relationship = "many-to-many")), "x") expect_equal(group_vars(left_join(g, g, by = c("x", "y"), relationship = "many-to-many")), "x") expect_equal(group_vars(semi_join(g, g, by = c("x", "y"))), "x") expect_equal(group_vars(anti_join(g, g, by = c("x", "y"))), "x") }) test_that("constructors drops groups", { df <- data.frame(x = 1:3) %>% group_by(x) expect_equal(group_vars(as_tibble(df)), character()) }) test_that("grouping by constant adds column (#410)", { grouped <- group_by(mtcars, "cyl") %>% summarise(foo = n()) expect_equal(names(grouped), c('"cyl"', "foo")) expect_equal(nrow(grouped), 1L) }) test_that("can partially `ungroup()` (#6606)", { df <- tibble(g1 = 1:2, g2 = 3:4, x = 5:6) gdf <- group_by(df, g1, g2) expect_identical(ungroup(gdf, g1), group_by(df, g2)) expect_identical(ungroup(gdf, g1, g2), df) }) test_that("can't rename while partially `ungroup()`-ing (#6606)", { df <- tibble(g = 1:2, x = 3:4) gdf <- group_by(df, g) expect_snapshot(error = TRUE, { ungroup(gdf, g2 = g) }) }) test_that(".dots is soft deprecated", { rlang::local_options(lifecycle_verbosity = "warning") df <- tibble(x = 1, y = 1) expect_warning(gf <- group_by(df, .dots = "x"), "deprecated") }) # Test full range of variable types -------------------------------------------- test_that("local group_by preserves variable types", { df_var <- tibble( l = c(T, F), i = 1:2, d = Sys.Date() + 1:2, f = factor(letters[1:2]), num = 1:2 + 0.5, t = Sys.time() + 1:2, c = letters[1:2] ) attr(df_var$t, "tzone") <- "" for (var in names(df_var)) { expected <- tibble(!!var := sort(unique(df_var[[var]])), n = 1L) summarised <- df_var %>% group_by(!!sym(var)) %>% summarise(n = n()) expect_equal(summarised, expected) } }) test_that("mutate does not lose variables (#144)", { df <- tibble(a = rep(1:4, 2), b = rep(1:4, each = 2), x = runif(8)) by_ab <- group_by(df, a, b) by_a <- summarise(by_ab, x = sum(x), .groups = "drop_last") by_a_quartile <- group_by(by_a, quartile = ntile(x, 4)) expect_equal(names(by_a_quartile), c("a", "b", "x", "quartile")) }) test_that("group_by uses shallow copy", { skip_if_not_installed("lobstr") m1 <- group_by(mtcars, cyl) expect_equal(group_vars(mtcars), character()) expect_equal( lobstr::obj_addrs(mtcars), lobstr::obj_addrs(m1) ) }) test_that("group_by orders by groups. #242", { df <- data.frame(a = sample(1:10, 3000, replace = TRUE)) %>% group_by(a) expect_equal(group_data(df)$a, 1:10) df <- data.frame(a = sample(letters[1:10], 3000, replace = TRUE), stringsAsFactors = FALSE) %>% group_by(a) expect_equal(group_data(df)$a, letters[1:10]) df <- data.frame(a = sample(sqrt(1:10), 3000, replace = TRUE)) %>% group_by(a) expect_equal(group_data(df)$a, sqrt(1:10)) }) test_that("Can group_by() a POSIXlt", { skip_if_not_installed("tibble", "2.99.99") df <- tibble(x = 1:5, times = as.POSIXlt(seq.Date(Sys.Date(), length.out = 5, by = "day"))) g <- group_by(df, times) expect_equal(nrow(group_data(g)), 5L) }) test_that("group_by() handles list as grouping variables", { df <- tibble(x = 1:3, y = list(1:2, 1:3, 1:2)) gdata <- group_data(group_by(df, y)) expect_equal(nrow(gdata), 2L) expect_equal(gdata$y, list(1:2, 1:3)) expect_equal(gdata$.rows, list_of(c(1L, 3L), 2L)) }) test_that("select(group_by(.)) implicitly adds grouping variables (#170)", { expect_snapshot( res <- mtcars %>% group_by(vs) %>% select(mpg) ) expect_equal(names(res), c("vs", "mpg")) }) test_that("group_by only creates one group for NA (#401)", { x <- as.numeric(c(NA, NA, NA, 10:1, 10:1)) w <- c(20, 30, 40, 1:10, 1:10) * 10 n_distinct(x) # 11 OK res <- data.frame(x = x, w = w) %>% group_by(x) %>% summarise(n = n()) expect_equal(nrow(res), 11L) }) test_that("there can be 0 groups (#486)", { data <- tibble(a = numeric(0), g = character(0)) %>% group_by(g) expect_equal(length(data$a), 0L) expect_equal(length(data$g), 0L) expect_equal(map_int(group_rows(data), length), integer(0)) }) test_that("group_by works with zero-row data frames (#486)", { df <- data.frame(a = numeric(0), b = numeric(0), g = character(0)) dfg <- group_by(df, g, .drop = FALSE) expect_equal(dim(dfg), c(0, 3)) expect_equal(group_vars(dfg), "g") expect_equal(group_size(dfg), integer(0)) x <- summarise(dfg, n = n()) expect_equal(dim(x), c(0, 2)) expect_equal(group_vars(x), character()) x <- mutate(dfg, c = b + 1) expect_equal(dim(x), c(0, 4)) expect_equal(group_vars(x), "g") expect_equal(group_size(x), integer(0)) x <- filter(dfg, a == 100) expect_equal(dim(x), c(0, 3)) expect_equal(group_vars(x), "g") expect_equal(group_size(x), integer(0)) x <- arrange(dfg, a, g) expect_equal(dim(x), c(0, 3)) expect_equal(group_vars(x), "g") expect_equal(group_size(x), integer(0)) expect_snapshot( x <- select(dfg, a) # Only select 'a' column; should result in 'g' and 'a' ) expect_equal(dim(x), c(0, 2)) expect_equal(group_vars(x), "g") expect_equal(group_size(x), integer(0)) }) test_that("[ on grouped_df preserves grouping if subset includes grouping vars", { df <- tibble(x = 1:5, ` ` = 6:10) by_x <- df %>% group_by(x) expect_equal(by_x %>% groups(), by_x %>% `[`(1:2) %>% groups()) # non-syntactic name by_ns <- df %>% group_by(` `) expect_equal(by_ns %>% groups(), by_ns %>% `[`(1:2) %>% groups()) }) test_that("[ on grouped_df drops grouping if subset doesn't include grouping vars", { by_cyl <- mtcars %>% group_by(cyl) no_cyl <- by_cyl %>% `[`(c(1, 3)) expect_equal(group_vars(no_cyl), character()) expect_s3_class(no_cyl, "tbl_df") }) test_that("group_by works after arrange (#959)", { df <- tibble(Log = c(1, 2, 1, 2, 1, 2), Time = c(10, 1, 3, 0, 15, 11)) res <- df %>% arrange(Time) %>% group_by(Log) %>% mutate(Diff = Time - lag(Time)) expect_true(all(is.na(res$Diff[c(1, 3)]))) expect_equal(res$Diff[c(2, 4, 5, 6)], c(1, 7, 10, 5)) }) test_that("group_by keeps attributes", { d <- data.frame(x = structure(1:10, foo = "bar")) gd <- group_by(d) expect_equal(attr(gd$x, "foo"), "bar") }) test_that("ungroup.rowwise_df gives a tbl_df (#936)", { res <- mtcars %>% rowwise() %>% ungroup() %>% class() expect_equal(res, c("tbl_df", "tbl", "data.frame")) }) test_that(paste0("group_by handles encodings for native strings (#1507)"), { local_non_utf8_encoding() special <- get_native_lang_string() df <- data.frame(x = 1:3, Eng = 2:4) for (names_converter in c(enc2native, enc2utf8)) { for (dots_converter in c(enc2native, enc2utf8)) { names(df) <- names_converter(c(special, "Eng")) res <- group_by(df, !!!syms(dots_converter(special))) expect_equal(names(res), names(df)) expect_equal(group_vars(res), special) } } for (names_converter in c(enc2native, enc2utf8)) { names(df) <- names_converter(c(special, "Eng")) res <- group_by(df, !!!special) expect_equal(names(res), c(names(df), deparse(special))) expect_equal(groups(res), list(as.name(enc2native(deparse(special))))) } }) test_that("group_by handles raw columns (#1803)", { df <- tibble(a = 1:3, b = as.raw(1:3)) expect_identical(ungroup(group_by(df, a)), df) expect_identical(ungroup(group_by(df, b)), df) }) test_that("rowwise handles raw columns (#1803)", { df <- tibble(a = 1:3, b = as.raw(1:3)) expect_s3_class(rowwise(df), "rowwise_df") }) test_that("group_by() names pronouns correctly (#2686)", { expect_named(group_by(tibble(x = 1), .data$x), "x") expect_named(group_by(tibble(x = 1), .data[["x"]]), "x") }) test_that("group_by() does not affect input data (#3028)", { x <- data.frame(old1 = c(1, 2, 3), old2 = c(4, 5, 6)) %>% group_by(old1) y <- x %>% select(new1 = old1, new2 = old2) expect_identical(groups(x), syms(quote(old1))) }) test_that("group_by() does not mutate for nothing when using the .data pronoun (#2752, #3533)", { expect_identical( iris %>% group_by(Species) %>% group_by(.data$Species), iris %>% group_by(Species) ) expect_identical( iris %>% group_by(Species) %>% group_by(.data[["Species"]]), iris %>% group_by(Species) ) df <- tibble(x = 1:5) attr(df, "y") <- 1 expect_equal( df %>% group_by(.data$x) %>% attr("y"), 1 ) expect_equal( df %>% group_by(.data[["x"]]) %>% attr("y"), 1 ) }) test_that("tbl_sum gets the right number of groups", { res <- data.frame(x=c(1,1,2,2)) %>% group_by(x) %>% pillar::tbl_sum() expect_equal(res, c("A tibble" = "4 x 1", "Groups" = "x [2]")) }) test_that("group_by ignores empty quosures (3780)", { empty <- quo() expect_equal(group_by(mtcars, cyl), group_by(mtcars, cyl, !!empty)) }) # Zero groups --------------------------------------------------- test_that("mutate handles grouped tibble with 0 groups (#3935)", { df <- tibble(x=integer()) %>% group_by(x) res <- mutate(df, y = mean(x), z = +mean(x), n = n()) expect_equal(names(res), c("x", "y", "z", "n")) expect_equal(nrow(res), 0L) expect_equal(res$y, double()) expect_equal(res$z, double()) expect_equal(res$n, integer()) }) test_that("summarise handles grouped tibble with 0 groups (#3935)", { df <- tibble(x=integer()) %>% group_by(x) res <- summarise(df, y = mean(x), z = +mean(x), n = n()) expect_equal(names(res), c("x", "y", "z", "n")) expect_equal(nrow(res), 0L) expect_equal(res$y, double()) expect_equal(res$n, integer()) expect_equal(res$z, double()) }) test_that("filter handles grouped tibble with 0 groups (#3935)", { df <- tibble(x=integer()) %>% group_by(x) res <- filter(df, x > 3L) expect_identical(df, res) }) test_that("select handles grouped tibble with 0 groups (#3935)", { df <- tibble(x=integer()) %>% group_by(x) res <- select(df, x) expect_identical(df, res) }) test_that("arrange handles grouped tibble with 0 groups (#3935)", { df <- tibble(x=integer()) %>% group_by(x) res <- arrange(df, x) expect_identical(df, res) }) test_that("group_by() with empty spec produces a grouped data frame with 0 grouping variables", { gdata <- group_data(group_by(iris)) expect_equal(names(gdata), ".rows") expect_equal(gdata$.rows, list_of(1:nrow(iris))) gdata <- group_data(group_by(iris, !!!list())) expect_equal(names(gdata), ".rows") expect_equal(gdata$.rows, list_of(1:nrow(iris))) }) # .drop = TRUE --------------------------------------------------- test_that("group_by(.drop = TRUE) drops empty groups (4061)", { res <- iris %>% filter(Species == "setosa") %>% group_by(Species, .drop = TRUE) expect_identical( group_data(res), structure( tibble(Species = factor("setosa", levels = levels(iris$Species)), .rows := list_of(1:50)), .drop = TRUE ) ) expect_true(group_by_drop_default(res)) }) test_that("grouped data frames remember their .drop (#4061)", { res <- iris %>% filter(Species == "setosa") %>% group_by(Species, .drop = TRUE) res2 <- res %>% filter(Sepal.Length > 5) expect_true(group_by_drop_default(res2)) res3 <- res %>% filter(Sepal.Length > 5, .preserve = FALSE) expect_true(group_by_drop_default(res3)) res4 <- res3 %>% group_by(Species) expect_true(group_by_drop_default(res4)) expect_equal(nrow(group_data(res4)), 1L) }) test_that("grouped data frames remember their .drop = FALSE (#4337)", { res <- iris %>% filter(Species == "setosa") %>% group_by(Species, .drop = FALSE) expect_false(group_by_drop_default(res)) res2 <- res %>% group_by(Species) expect_false(group_by_drop_default(res2)) }) test_that("group_by(.drop = FALSE) preserve ordered factors (#5455)", { df <- tibble(x = ordered("x")) drop <- df %>% group_by(x) %>% group_data() nodrop <- df %>% group_by(x, .drop = FALSE) %>% group_data() expect_equal(is.ordered(drop$x), is.ordered(nodrop$x)) expect_true(is.ordered(nodrop$x)) }) test_that("summarise maintains the .drop attribute (#4061)", { df <- tibble( f1 = factor("a", levels = c("a", "b", "c")), f2 = factor("d", levels = c("d", "e", "f", "g")), x = 42 ) res <- df %>% group_by(f1, f2, .drop = TRUE) expect_equal(n_groups(res), 1L) res2 <- summarise(res, x = sum(x), .groups = "drop_last") expect_equal(n_groups(res2), 1L) expect_true(group_by_drop_default(res2)) }) test_that("joins maintain the .drop attribute (#4061)", { df1 <- group_by(tibble( f1 = factor(c("a", "b"), levels = c("a", "b", "c")), x = 42:43 ), f1, .drop = TRUE) df2 <- group_by(tibble( f1 = factor(c("a"), levels = c("a", "b", "c")), y = 1 ), f1, .drop = TRUE) res <- left_join(df1, df2, by = "f1") expect_equal(n_groups(res), 2L) df2 <- group_by(tibble( f1 = factor(c("a", "c"), levels = c("a", "b", "c")), y = 1:2 ), f1, .drop = TRUE) res <- full_join(df1, df2, by = "f1") expect_equal(n_groups(res), 3L) }) test_that("group_by(add = TRUE) sets .drop if the origonal data was .drop", { d <- tibble( f1 = factor("b", levels = c("a", "b", "c")), f2 = factor("g", levels = c("e", "f", "g")), x = 48 ) res <- group_by(group_by(d, f1, .drop = TRUE), f2, .add = TRUE) expect_equal(n_groups(res), 1L) expect_true(group_by_drop_default(res)) }) test_that("group_by_drop_default() is forgiving about corrupt grouped df (#4306)",{ df <- tibble(x = 1:2, y = 1:2) %>% structure(class = c("grouped_df", "tbl_df", "tbl", "data.frame")) expect_true(group_by_drop_default(df)) }) test_that("group_by() puts NA groups last in STRSXP (#4227)", { res <- tibble(x = c("apple", NA, "banana"), y = 1:3) %>% group_by(x) %>% group_data() expect_identical(res$x, c("apple", "banana", NA_character_)) expect_identical(res$.rows, list_of(1L, 3L, 2L)) }) test_that("group_by() does not create arbitrary NA groups for factors when drop = TRUE (#4460)", { res <- expect_warning(group_data(group_by(iris, Species)[0, ]), NA) expect_equal(nrow(res), 0L) res <- expect_warning(group_data(group_by(iris[0, ], Species)), NA) expect_equal(nrow(res), 0L) }) test_that("group_by() can handle auto splicing in the mutate() step", { expect_identical( iris %>% group_by(Species), iris %>% group_by(data.frame(Species = Species)) ) expect_identical( iris %>% group_by(Species), iris %>% group_by(pick(Species)) ) expect_identical( iris %>% mutate(across(starts_with("Sepal"), round)) %>% group_by(Sepal.Length, Sepal.Width), iris %>% group_by(across(starts_with("Sepal"), round)) ) }) test_that("group_by() can combine usual spec and auto-splicing-mutate() step", { expect_identical( iris %>% mutate(across(starts_with("Sepal"), round)) %>% group_by(Sepal.Length, Sepal.Width, Species), iris %>% group_by(across(starts_with("Sepal"), round), Species) ) expect_identical( iris %>% mutate(across(starts_with("Sepal"), round)) %>% group_by(Species, Sepal.Length, Sepal.Width), iris %>% group_by(Species, across(starts_with("Sepal"), round)) ) }) # mutate() semantics test_that("group_by() has mutate() semantics (#4984)", { expect_equal( tibble(a = 1, b = 2) %>% group_by(c = a * b, d = c + 1), tibble(a = 1, b = 2) %>% mutate(c = a * b, d = c + 1) %>% group_by(c, d) ) }) test_that("implicit mutate() operates on ungrouped data (#5598)", { vars <- tibble(x = c(1,2), y = c(3,4), z = c(5,6)) %>% dplyr::group_by(y) %>% dplyr::group_by(pick(any_of(c('y','z')))) %>% dplyr::group_vars() expect_equal(vars, c("y", "z")) }) test_that("grouped_df() does not break row.names (#5745)", { groups <- compute_groups(data.frame(x = 1:10), "x") expect_equal(.row_names_info(groups, type = 0), c(NA, -10L)) }) test_that("group_by() keeps attributes unrelated to the grouping (#5760)", { d <- data.frame(x = 453, y = 642) attr(d, "foo") <- "bar" d2 <- group_by(d, x) expect_equal(attr(d2, "foo"), "bar") d3 <- group_by(d2, y, .add = TRUE) expect_equal(attr(d2, "foo"), "bar") d4 <- group_by(d2, y2 = y * 2, .add = TRUE) expect_equal(attr(d2, "foo"), "bar") }) test_that("group_by() works with quosures (tidyverse/lubridate#959)", { ignore <- function(...) NA f <- function(var) { tibble(x = 1) %>% group_by(g = ignore({{ var }})) } g <- function(var) { # This used to fail with the extra argument tibble(x = 1) %>% group_by(g = ignore({{ var }}, 1)) } expect_equal(f(), tibble(x = 1, g = NA) %>% group_by(g)) expect_equal(g(), tibble(x = 1, g = NA) %>% group_by(g)) }) # Errors ------------------------------------------------------------------ test_that("group_by() and ungroup() give meaningful error messages", { expect_snapshot({ df <- tibble(x = 1, y = 2) (expect_error(df %>% group_by(unknown))) (expect_error(df %>% ungroup(x))) (expect_error(df %>% group_by(x, y) %>% ungroup(z))) (expect_error(df %>% group_by(z = a + 1))) }) }) dplyr/tests/testthat/test-summarise.R0000644000176200001440000004051114472225345017524 0ustar liggesuserstest_that("can use freshly create variables (#138)", { df <- tibble(x = 1:10) out <- summarise(df, y = mean(x), z = y + 1) expect_equal(out$y, 5.5) expect_equal(out$z, 6.5) }) test_that("inputs are recycled (deprecated in 1.1.0)", { local_options(lifecycle_verbosity = "quiet") expect_equal( tibble() %>% summarise(x = 1, y = 1:3, z = 1), tibble(x = 1, y = 1:3, z = 1) ) gf <- group_by(tibble(a = 1:2), a) expect_equal( gf %>% summarise(x = 1, y = 1:3, z = 1), tibble(a = rep(1:2, each = 3), x = 1, y = c(1:3, 1:3), z = 1) %>% group_by(a) ) expect_equal( gf %>% summarise(x = seq_len(a), y = 1), tibble(a = c(1L, 2L, 2L), x = c(1L, 1L, 2L), y = 1) %>% group_by(a) ) }) test_that("works with empty data frames", { # 0 rows df <- tibble(x = integer()) expect_equal(summarise(df), tibble(.rows = 1)) expect_equal(summarise(df, n = n(), sum = sum(x)), tibble(n = 0, sum = 0)) # 0 cols df <- tibble(.rows = 10) expect_equal(summarise(df), tibble(.rows = 1)) expect_equal(summarise(df, n = n()), tibble(n = 10)) }) test_that("works with grouped empty data frames", { df <- tibble(x = integer()) expect_equal( df %>% group_by(x) %>% summarise(y = 1L), tibble(x = integer(), y = integer()) ) expect_equal( df %>% rowwise(x) %>% summarise(y = 1L), group_by(tibble(x = integer(), y = integer()), x) ) }) test_that("no expressions yields grouping data", { df <- tibble(x = 1:2, y = 1:2) gf <- group_by(df, x) expect_equal(summarise(df), tibble(.rows = 1)) expect_equal(summarise(gf), tibble(x = 1:2)) expect_equal(summarise(df, !!!list()), tibble(.rows = 1)) expect_equal(summarise(gf, !!!list()), tibble(x = 1:2)) }) test_that("preserved class, but not attributes", { df <- structure( data.frame(x = 1:10, g1 = rep(1:2, each = 5), g2 = rep(1:5, 2)), meta = "this is important" ) out <- df %>% summarise(n = n()) expect_s3_class(out, "data.frame", exact = TRUE) expect_null(attr(out, "res")) out <- df %>% group_by(g1) %>% summarise(n = n()) # expect_s3_class(out, "data.frame", exact = TRUE) expect_null(attr(out, "res")) }) test_that("works with unquoted values", { df <- tibble(g = c(1, 1, 2, 2, 2), x = 1:5) expect_equal(summarise(df, out = !!1), tibble(out = 1)) expect_equal(summarise(df, out = !!quo(1)), tibble(out = 1)) }) test_that("formulas are evaluated in the right environment (#3019)", { out <- mtcars %>% summarise(fn = list(rlang::as_function(~ list(~foo, environment())))) out <- out$fn[[1]]() expect_identical(environment(out[[1]]), out[[2]]) }) test_that("unnamed data frame results with 0 columns are ignored (#5084)", { df1 <- tibble(x = 1:2) expect_equal(df1 %>% group_by(x) %>% summarise(data.frame()), df1) expect_equal(df1 %>% group_by(x) %>% summarise(data.frame(), y = 65), mutate(df1, y = 65)) expect_equal(df1 %>% group_by(x) %>% summarise(y = 65, data.frame()), mutate(df1, y = 65)) df2 <- tibble(x = 1:2, y = 3:4) expect_equal(df2 %>% group_by(x) %>% summarise(data.frame()), df1) expect_equal(df2 %>% group_by(x) %>% summarise(data.frame(), z = 98), mutate(df1, z = 98)) expect_equal(df2 %>% group_by(x) %>% summarise(z = 98, data.frame()), mutate(df1, z = 98)) # This includes unnamed data frames that have 0 columns but >0 rows. # Noted when working on (#6509). empty3 <- new_tibble(list(), nrow = 3L) expect_equal(df1 %>% summarise(empty3), new_tibble(list(), nrow = 1L)) expect_equal(df1 %>% summarise(empty3, y = mean(x)), df1 %>% summarise(y = mean(x))) expect_equal(df1 %>% group_by(x) %>% summarise(empty3), df1) expect_equal(df1 %>% group_by(x) %>% summarise(empty3, y = x + 1), mutate(df1, y = x + 1)) }) test_that("named data frame results with 0 columns participate in recycling (#6509)", { local_options(lifecycle_verbosity = "quiet") df <- tibble(x = 1:3) gdf <- group_by(df, x) empty <- tibble() expect_identical(summarise(df, empty = empty), tibble(empty = empty)) expect_identical(summarise(df, x = sum(x), empty = empty), tibble(x = integer(), empty = empty)) expect_identical(summarise(df, empty = empty, x = sum(x)), tibble(empty = empty, x = integer())) empty3 <- new_tibble(list(), nrow = 3L) expect_identical(summarise(df, empty = empty3), tibble(empty = empty3)) expect_identical(summarise(df, x = sum(x), empty = empty3), tibble(x = c(6L, 6L, 6L), empty = empty3)) expect_identical(summarise(df, empty = empty3, x = sum(x)), tibble(empty = empty3, x = c(6L, 6L, 6L))) expect_identical( summarise(gdf, empty = empty, .groups = "drop"), tibble(x = integer(), empty = empty) ) expect_identical( summarise(gdf, y = x + 1L, empty = empty, .groups = "drop"), tibble(x = integer(), y = integer(), empty = empty) ) expect_identical( summarise(gdf, empty = empty3, .groups = "drop"), tibble(x = vec_rep_each(1:3, 3), empty = vec_rep(empty3, 3)) ) expect_identical( summarise(gdf, y = x + 1L, empty = empty3, .groups = "drop"), tibble(x = vec_rep_each(1:3, 3), y = vec_rep_each(2:4, 3), empty = vec_rep(empty3, 3)) ) }) test_that("can't overwrite column active bindings (#6666)", { skip_if(getRversion() < "3.6.3", message = "Active binding error changed") df <- tibble(g = c(1, 1, 2, 2), x = 1:4) gdf <- group_by(df, g) # The error seen here comes from trying to `<-` to an active binding when # the active binding function has 0 arguments. expect_snapshot(error = TRUE, { summarise(df, y = { x <<- x + 2L mean(x) }) }) expect_snapshot(error = TRUE, { summarise(df, .by = g, y = { x <<- x + 2L mean(x) }) }) expect_snapshot(error = TRUE, { summarise(gdf, y = { x <<- x + 2L mean(x) }) }) }) test_that("assigning with `<-` doesn't affect the mask (#6666)", { df <- tibble(g = c(1, 1, 2, 2), x = 1:4) gdf <- group_by(df, g) out <- summarise(df, .by = g, y = { x <- x + 4L mean(x) }) expect_identical(out$y, c(5.5, 7.5)) out <- summarise(gdf, y = { x <- x + 4L mean(x) }) expect_identical(out$y, c(5.5, 7.5)) }) test_that("summarise() correctly auto-names expressions (#6741)", { df <- tibble(a = 1:3) expect_identical(summarise(df, min(-a)), tibble("min(-a)" = -3L)) }) # grouping ---------------------------------------------------------------- test_that("peels off a single layer of grouping", { df <- tibble(x = rep(1:4, each = 4), y = rep(1:2, each = 8), z = runif(16)) gf <- df %>% group_by(x, y) expect_equal(group_vars(summarise(gf)), "x") expect_equal(group_vars(summarise(summarise(gf))), character()) }) test_that("correctly reconstructs groups", { d <- tibble(x = 1:4, g1 = rep(1:2, 2), g2 = 1:4) %>% group_by(g1, g2) %>% summarise(x = x + 1) expect_equal(group_rows(d), list_of(1:2, 3:4)) }) test_that("can modify grouping variables", { df <- tibble(a = c(1, 2, 1, 2), b = c(1, 1, 2, 2)) gf <- group_by(df, a, b) i <- count_regroups(out <- summarise(gf, a = a + 1)) expect_equal(i, 1) expect_equal(out$a, c(2, 2, 3, 3)) }) test_that("summarise returns a row for zero length groups", { df <- tibble( e = 1, f = factor(c(1, 1, 2, 2), levels = 1:3), g = c(1, 1, 2, 2), x = c(1, 2, 1, 4) ) df <- group_by(df, e, f, g, .drop = FALSE) expect_equal( nrow(summarise(df, z = n())), 3L) }) test_that("summarise respects zero-length groups (#341)", { df <- tibble(x = factor(rep(1:3, each = 10), levels = 1:4)) out <- df %>% group_by(x, .drop = FALSE) %>% summarise(n = n()) expect_equal(out$n, c(10L, 10L, 10L, 0L)) }) # vector types ---------------------------------------------------------- test_that("summarise allows names (#2675)", { data <- tibble(a = 1:3) %>% summarise(b = c("1" = a[[1]])) expect_equal(names(data$b), "1") data <- tibble(a = 1:3) %>% rowwise() %>% summarise(b = setNames(nm = a)) expect_equal(names(data$b), c("1", "2", "3")) data <- tibble(a = c(1, 1, 2)) %>% group_by(a) %>% summarise(b = setNames(nm = a[[1]])) expect_equal(names(data$b), c("1", "2")) res <- data.frame(x = c(1:3), y = letters[1:3]) %>% group_by(y) %>% summarise( a = length(x), b = quantile(x, 0.5) ) expect_equal(res$b, c("50%" = 1, "50%" = 2, "50%" = 3)) }) test_that("summarise handles list output columns (#832)", { df <- tibble(x = 1:10, g = rep(1:2, each = 5)) res <- df %>% group_by(g) %>% summarise(y = list(x)) expect_equal(res$y[[1]], 1:5) # preserving names d <- tibble(x = rep(1:3, 1:3), y = 1:6, names = letters[1:6]) res <- d %>% group_by(x) %>% summarise(y = list(setNames(y, names))) expect_equal(names(res$y[[1]]), letters[[1]]) }) test_that("summarise coerces types across groups", { gf <- group_by(tibble(g = 1:2), g) out <- summarise(gf, x = if (g == 1) NA else "x") expect_type(out$x, "character") out <- summarise(gf, x = if (g == 1L) NA else 2.5) expect_type(out$x, "double") }) test_that("unnamed tibbles are unpacked (#2326)", { df <- tibble(x = 2) out <- summarise(df, tibble(y = x * 2, z = 3)) expect_equal(out$y, 4) expect_equal(out$z, 3) }) test_that("named tibbles are packed (#2326)", { df <- tibble(x = 2) out <- summarise(df, df = tibble(y = x * 2, z = 3)) expect_equal(out$df, tibble(y = 4, z = 3)) }) test_that("summarise(.groups=) in global environment", { expect_message(eval_bare( expr(data.frame(x = 1, y = 2) %>% group_by(x, y) %>% summarise()), env(global_env()) )) expect_message(eval_bare( expr(data.frame(x = 1, y = 2) %>% rowwise(x, y) %>% summarise()), env(global_env()) )) }) test_that("summarise(.groups=)", { df <- data.frame(x = 1, y = 2) expect_equal(df %>% summarise(z = 3, .groups= "rowwise"), rowwise(data.frame(z = 3))) gf <- df %>% group_by(x, y) expect_equal(gf %>% summarise() %>% group_vars(), "x") expect_equal(gf %>% summarise(.groups = "drop_last") %>% group_vars(), "x") expect_equal(gf %>% summarise(.groups = "drop") %>% group_vars(), character()) expect_equal(gf %>% summarise(.groups = "keep") %>% group_vars(), c("x", "y")) rf <- df %>% rowwise(x, y) expect_equal(rf %>% summarise(.groups = "drop") %>% group_vars(), character()) expect_equal(rf %>% summarise(.groups = "keep") %>% group_vars(), c("x", "y")) }) test_that("summarise() casts data frame results to common type (#5646)", { df <- data.frame(x = 1:2, g = 1:2) %>% group_by(g) res <- df %>% summarise(if (g == 1) data.frame(y = 1) else data.frame(y = 1, z = 2), .groups = "drop") expect_equal(res$z, c(NA, 2)) }) test_that("summarise() silently skips when all results are NULL (#5708)", { df <- data.frame(x = 1:2, g = 1:2) %>% group_by(g) expect_equal(summarise(df, x = NULL), summarise(df)) expect_error(summarise(df, x = if(g == 1) 42)) }) # .by ---------------------------------------------------------------------- test_that("can group transiently using `.by`", { df <- tibble(g = c(1, 1, 2, 1, 2), x = c(5, 2, 1, 2, 3)) out <- summarise(df, x = mean(x), .by = g) expect_identical(out$g, c(1, 2)) expect_identical(out$x, c(3, 2)) expect_s3_class(out, class(df), exact = TRUE) }) test_that("transient grouping retains bare data.frame class", { df <- data.frame(g = c(1, 1, 2, 1, 2), x = c(5, 2, 1, 2, 3)) out <- summarise(df, x = mean(x), .by = g) expect_s3_class(out, class(df), exact = TRUE) }) test_that("transient grouping drops data frame attributes", { # Because `summarise()` theoretically creates a "new" data frame # With data.frames or tibbles df <- data.frame(g = c(1, 1, 2), x = c(1, 2, 1)) tbl <- as_tibble(df) attr(df, "foo") <- "bar" attr(tbl, "foo") <- "bar" out <- summarise(df, x = mean(x), .by = g) expect_null(attr(out, "foo")) out <- summarise(tbl, x = mean(x), .by = g) expect_null(attr(out, "foo")) }) test_that("transient grouping orders by first appearance", { df <- tibble(g = c(2, 1, 2, 0), x = c(4, 2, 8, 5)) out <- summarise(df, x = mean(x), .by = g) expect_identical(out$g, c(2, 1, 0)) expect_identical(out$x, c(6, 2, 5)) }) test_that("can't use `.by` with `.groups`", { df <- tibble(x = 1) expect_snapshot(error = TRUE, { summarise(df, .by = x, .groups = "drop") }) }) test_that("catches `.by` with grouped-df", { df <- tibble(x = 1) gdf <- group_by(df, x) expect_snapshot(error = TRUE, { summarise(gdf, .by = x) }) }) test_that("catches `.by` with rowwise-df", { df <- tibble(x = 1) rdf <- rowwise(df) expect_snapshot(error = TRUE, { summarise(rdf, .by = x) }) }) # errors ------------------------------------------------------------------- test_that("summarise() preserves the call stack on error (#5308)", { foobar <- function() stop("foo") stack <- NULL expect_error( withCallingHandlers( error = function(...) stack <<- sys.calls(), summarise(mtcars, foobar()) ) ) expect_true(some(stack, is_call, "foobar")) }) test_that("`summarise()` doesn't allow data frames with missing or empty names (#6758)", { df1 <- new_data_frame(set_names(list(1), "")) df2 <- new_data_frame(set_names(list(1), NA_character_)) expect_snapshot(error = TRUE, { summarise(df1) }) expect_snapshot(error = TRUE, { summarise(df2) }) }) test_that("summarise() gives meaningful errors", { eval(envir = global_env(), expr({ expect_snapshot({ # Messages about .groups= tibble(x = 1, y = 2) %>% group_by(x, y) %>% summarise() tibble(x = 1, y = 2) %>% rowwise(x, y) %>% summarise() tibble(x = 1, y = 2) %>% rowwise() %>% summarise() }) })) eval(envir = global_env(), expr({ expect_snapshot({ # unsupported type (expect_error( tibble(x = 1, y = c(1, 2, 2), z = runif(3)) %>% summarise(a = rlang::env(a = 1)) )) (expect_error( tibble(x = 1, y = c(1, 2, 2), z = runif(3)) %>% group_by(x, y) %>% summarise(a = rlang::env(a = 1)) )) (expect_error( tibble(x = 1, y = c(1, 2, 2), z = runif(3)) %>% rowwise() %>% summarise(a = lm(y ~ x)) )) # mixed types (expect_error( tibble(id = 1:2, a = list(1, "2")) %>% group_by(id) %>% summarise(a = a[[1]]) )) (expect_error( tibble(id = 1:2, a = list(1, "2")) %>% rowwise() %>% summarise(a = a[[1]]) )) # incompatible size (expect_error( tibble(z = 1) %>% summarise(x = 1:3, y = 1:2) )) (expect_error( tibble(z = 1:2) %>% group_by(z) %>% summarise(x = 1:3, y = 1:2) )) (expect_error( tibble(z = c(1, 3)) %>% group_by(z) %>% summarise(x = seq_len(z), y = 1:2) )) # mixed nulls (expect_error( data.frame(x = 1:2, g = 1:2) %>% group_by(g) %>% summarise(x = if(g == 1) 42) )) (expect_error( data.frame(x = 1:2, g = 1:2) %>% group_by(g) %>% summarise(x = if(g == 2) 42) )) # .data pronoun (expect_error(summarise(tibble(a = 1), c = .data$b))) (expect_error(summarise(group_by(tibble(a = 1:3), a), c = .data$b))) # Duplicate column names (expect_error( tibble(x = 1, x = 1, .name_repair = "minimal") %>% summarise(x) )) # Not glue()ing (expect_error(tibble() %>% summarise(stop("{")))) (expect_error( tibble(a = 1, b="{value:1, unit:a}") %>% group_by(b) %>% summarise(a = stop("!")) )) }) })) }) test_that("non-summary results are deprecated in favor of `reframe()` (#6382)", { local_options(lifecycle_verbosity = "warning") df <- tibble(g = c(1, 1, 2), x = 1:3) gdf <- group_by(df, g) rdf <- rowwise(df) expect_snapshot({ out <- summarise(df, x = which(x < 3)) }) expect_identical(out$x, 1:2) expect_snapshot({ out <- summarise(df, x = which(x < 3), .by = g) }) expect_identical(out$g, c(1, 1)) expect_identical(out$x, 1:2) # First group returns size 2 summary expect_snapshot({ out <- summarise(gdf, x = which(x < 3)) }) expect_identical(out$g, c(1, 1)) expect_identical(out$x, 1:2) # Last row returns size 0 summary expect_snapshot({ out <- summarise(rdf, x = which(x < 3)) }) expect_identical(out$x, c(1L, 1L)) }) dplyr/tests/testthat/test-relocate.R0000644000176200001440000001266714366556340017334 0ustar liggesusers# ------------------------------------------------------------------------------ # relocate() test_that(".before and .after relocate individual cols", { df <- tibble(x = 1, y = 2) expect_named(relocate(df, x, .after = y), c("y", "x")) expect_named(relocate(df, y, .before = x), c("y", "x")) }) test_that("can move blocks of variables", { df <- tibble(x = 1, a = "a", y = 2, b = "a") expect_named(relocate(df, where(is.character)), c("a", "b", "x", "y")) expect_named(relocate(df, where(is.character), .after = where(is.numeric)), c("x", "y", "a", "b")) }) test_that("don't lose non-contiguous variables", { df <- tibble(a = 1, b = 1, c = 1, d = 1, e = 1) expect_named(relocate(df, b, .after = c(a, c, e)), c("a", "c", "d", "e", "b")) expect_named(relocate(df, e, .before = c(b, d)), c("a", "e", "b", "c", "d")) }) test_that("no .before/.after moves to front", { df <- tibble(x = 1, y = 2) expect_named(relocate(df, y), c("y", "x")) }) test_that("can only supply one of .before and .after", { df <- tibble(x = 1) expect_snapshot(error = TRUE, { relocate(df, .before = 1, .after = 1) }) }) test_that("before and after are defused with context", { local_fn <- identity expect_identical( names(relocate(mtcars, 3, .before = local_fn(5))), names(relocate(mtcars, 3, .before = 5)) ) expect_identical( names(relocate(mtcars, 3, .after = local_fn(5))), names(relocate(mtcars, 3, .after = 5)) ) }) test_that("relocate() respects order specified by ... (#5328)", { df <- tibble(a = 1, x = 1, b = 1, z = 1, y = 1) expect_equal( names(relocate(df, x, y, z, .before = x)), c("a", "x", "y", "z", "b") ) expect_equal( names(relocate(df, x, y, z, .after = last_col())), c("a", "b", "x", "y", "z") ) expect_equal( names(relocate(df, x, a, z)), c("x", "a", "z", "b", "y") ) }) test_that("relocate() can rename (#5569)", { df <- tibble(a = 1, b = 1, c = 1, d = "a", e = "a", f = "a") expect_equal( relocate(df, ffff = f), tibble(ffff = "a", a = 1, b = 1, c = 1, d = "a", e = "a") ) expect_equal( relocate(df, ffff = f, .before = c), tibble(a = 1, b = 1, ffff = "a", c = 1, d = "a", e = "a") ) expect_equal( relocate(df, ffff = f, .after = c), tibble(a = 1, b = 1, c = 1, ffff = "a", d = "a", e = "a") ) }) test_that("`relocate()` retains the last duplicate when renaming while moving (#6209)", { # To enforce the invariant that `ncol(.data) == ncol(relocate(.data, ...))`. # Also matches `rename()` behavior. df <- tibble(x = 1) expect_named(relocate(df, a = x, b = x), "b") expect_identical( relocate(df, a = x, b = x), rename(df, a = x, b = x) ) df <- tibble(x = 1, y = 2) expect_named(relocate(df, a = x, b = y, c = x), c("b", "c")) expect_identical( relocate(df, a = x, b = y, c = x), select(rename(df, a = x, b = y, c = x), b, c) ) }) test_that("attributes of bare data frames are retained (#6341)", { # We require `[` methods to be in charge of keeping extra attributes for all # data frame subclasses (except for data.tables) df <- vctrs::data_frame(x = 1, y = 2) attr(df, "foo") <- "bar" out <- relocate(df, y, .before = x) expect_identical(attr(out, "foo"), "bar") }) # ------------------------------------------------------------------------------ # eval_relocate() test_that("works with zero column data frames (#6167)", { data <- tibble() expr <- expr(any_of("b")) expect_identical( eval_relocate(expr, data), set_names(integer()) ) }) test_that("works with `before` and `after` `everything()`", { data <- tibble(w = 1, x = 2, y = 3, z = 4) expr <- expr(c(y, z)) expr_everything <- expr(everything()) expect_identical( eval_relocate(expr, data, before = expr_everything), c(y = 3L, z = 4L, w = 1L, x = 2L) ) expect_identical( eval_relocate(expr, data, after = expr_everything), c(w = 1L, x = 2L, y = 3L, z = 4L) ) }) test_that("moves columns to the front when neither `before` nor `after` are specified", { data <- tibble(x = 1, y = 2, z = 3) expr <- expr(c(z, y)) expect_identical( eval_relocate(expr, data), c(z = 3L, y = 2L, x = 1L) ) }) test_that("Empty `before` selection moves columns to front", { data <- tibble(x = 1, y = 2, z = 3) expr <- expr(y) before <- expr(where(is.character)) expect_identical( eval_relocate(expr, data, before = before), c(y = 2L, x = 1L, z = 3L) ) }) test_that("Empty `after` selection moves columns to end", { data <- tibble(x = 1, y = 2, z = 3) expr <- expr(y) after <- expr(where(is.character)) expect_identical( eval_relocate(expr, data, after = after), c(x = 1L, z = 3L, y = 2L) ) }) test_that("Empty `before` and `after` selections work with 0-col data frames", { data <- tibble() expr <- expr(any_of("a")) expr_is_character <- expr(where(is.character)) expect_identical( eval_relocate(expr, data, before = expr_is_character), set_names(integer()) ) expect_identical( eval_relocate(expr, data, after = expr_is_character), set_names(integer()) ) }) test_that("retains the last duplicate when renaming while moving (#6209)", { # To enforce the invariant that relocating can't change the number of columns data <- tibble(x = 1) expr <- expr(c(a = x, b = x)) expect_identical( eval_relocate(expr, data), c(b = 1L) ) data <- tibble(x = 1, y = 2) expr <- expr(c(a = x, b = y, c = x)) expect_identical( eval_relocate(expr, data), c(b = 2L, c = 1L) ) }) dplyr/tests/testthat/test-src-dbi.R0000644000176200001440000000064114366556340017046 0ustar liggesuserstest_that("can work directly with DBI connection", { skip_if_not_installed("RSQLite") skip_if_not_installed("dbplyr") con <- DBI::dbConnect(RSQLite::SQLite(), ":memory:") on.exit(DBI::dbDisconnect(con)) df <- tibble(x = 1:10, y = letters[1:10]) df1 <- copy_to(con, df) df2 <- tbl(con, "df") expect_equal(collect(df1), df, ignore_attr = TRUE) expect_equal(collect(df2), df, ignore_attr = TRUE) }) dplyr/tests/testthat/test-colwise-funs.R0000644000176200001440000000037213663216626020141 0ustar liggesuserstest_that("as_fun_list() uses rlang auto-naming", { nms <- names(as_fun_list(list(min, max), env())) # Just check they are labellised as literals enclosed in brackets to # insulate from upstream changes expect_true(all(grepl("^<", nms))) }) dplyr/tests/testthat/test-top-n.R0000644000176200001440000000154514266276767016577 0ustar liggesuserstest_that("top_n returns n rows", { test_df <- data.frame(x = 1:10, y = 11:20) top_four <- test_df %>% top_n(4, y) expect_equal(dim(top_four), c(4, 2)) }) test_that("top_n() handles missing `wt`", { df <- data.frame(x = c(10, 4, 1, 6, 3, 1, 1)) expect_message( regexp = "Selecting by x", expect_identical(top_n(df, 2)$x, c(10, 6)) ) }) test_that("top_n() handles calls", { expect_identical(top_n(mtcars, 2, -disp), top_n(mtcars, -2, disp)) }) test_that("top_n() quotes n", { expect_snapshot(res1 <- top_n(mtcars, n() * .5)) expect_snapshot(res2 <- top_n(mtcars, 16)) expect_identical(res1, res2) }) test_that("top_frac() is a shorthand for top_n(n()*)", { expect_identical(top_n(mtcars, n() * .5, disp), top_frac(mtcars, .5, disp)) expect_snapshot(res1 <- top_n(mtcars, n() * .5)) expect_snapshot(res2 <- top_frac(mtcars, .5)) }) dplyr/tests/testthat/test-filter.R0000644000176200001440000004336214406402754017011 0ustar liggesuserstest_that("filter handles passing ...", { df <- data.frame(x = 1:4) f <- function(...) { x1 <- 4 f1 <- function(y) y filter(df, ..., f1(x1) > x) } g <- function(...) { x2 <- 2 f(x > x2, ...) } res <- g() expect_equal(res$x, 3L) df <- group_by(df, x) res <- g() expect_equal(res$x, 3L) }) test_that("filter handles simple symbols", { df <- data.frame(x = 1:4, test = rep(c(T, F), each = 2)) res <- filter(df, test) gdf <- group_by(df, x) res <- filter(gdf, test) h <- function(data) { test2 <- c(T, T, F, F) filter(data, test2) } expect_equal(h(df), df[1:2, ]) f <- function(data, ...) { one <- 1 filter(data, test, x > one, ...) } g <- function(data, ...) { four <- 4 f(data, x < four, ...) } res <- g(df) expect_equal(res$x, 2L) expect_equal(res$test, TRUE) res <- g(gdf) expect_equal(res$x, 2L) expect_equal(res$test, TRUE) }) test_that("filter handlers scalar results", { expect_equal(filter(mtcars, min(mpg) > 0), mtcars, ignore_attr = TRUE) expect_equal(filter(group_by(mtcars, cyl), min(mpg) > 0), group_by(mtcars, cyl)) }) test_that("filter propagates attributes", { date.start <- ISOdate(2010, 01, 01, 0) test <- data.frame(Date = ISOdate(2010, 01, 01, 1:10)) test2 <- test %>% filter(Date < ISOdate(2010, 01, 01, 5)) expect_equal(test$Date[1:4], test2$Date) }) test_that("filter discards NA", { temp <- data.frame( i = 1:5, x = c(NA, 1L, 1L, 0L, 0L) ) res <- filter(temp, x == 1) expect_equal(nrow(res), 2L) }) test_that("date class remains on filter (#273)", { x1 <- x2 <- data.frame( date = seq.Date(as.Date("2013-01-01"), by = "1 days", length.out = 2), var = c(5, 8) ) x1.filter <- x1 %>% filter(as.Date(date) > as.Date("2013-01-01")) x2$date <- x2$date + 1 x2.filter <- x2 %>% filter(as.Date(date) > as.Date("2013-01-01")) expect_equal(class(x1.filter$date), "Date") expect_equal(class(x2.filter$date), "Date") }) test_that("filter handles $ correctly (#278)", { d1 <- tibble( num1 = as.character(sample(1:10, 1000, T)), var1 = runif(1000), ) d2 <- data.frame(num1 = as.character(1:3), stringsAsFactors = FALSE) res1 <- d1 %>% filter(num1 %in% c("1", "2", "3")) res2 <- d1 %>% filter(num1 %in% d2$num1) expect_equal(res1, res2) }) test_that("filter() returns the input data if no parameters are given", { expect_identical(filter(mtcars), mtcars) expect_identical(filter(mtcars, !!!list()), mtcars) }) test_that("$ does not end call traversing. #502", { # Suppose some analysis options are set much earlier in the script analysis_opts <- list(min_outcome = 0.25) # Generate some dummy data d <- expand.grid(Subject = 1:3, TrialNo = 1:2, Time = 1:3) %>% as_tibble() %>% arrange(Subject, TrialNo, Time) %>% mutate(Outcome = (1:18 %% c(5, 7, 11)) / 10) # Do some aggregation trial_outcomes <- d %>% group_by(Subject, TrialNo) %>% summarise(MeanOutcome = mean(Outcome), .groups = "drop") left <- filter(trial_outcomes, MeanOutcome < analysis_opts$min_outcome) right <- filter(trial_outcomes, analysis_opts$min_outcome > MeanOutcome) expect_equal(left, right) }) test_that("filter handles POSIXlt", { datesDF <- read.csv(stringsAsFactors = FALSE, text = " X 2014-03-13 16:08:19 2014-03-13 16:16:23 2014-03-13 16:28:28 2014-03-13 16:28:54 ") datesDF$X <- as.POSIXlt(datesDF$X) expect_equal( nrow(filter(datesDF, X > as.POSIXlt("2014-03-13"))), 4 ) }) test_that("filter handles complex vectors (#436)", { d <- data.frame(x = 1:10, y = 1:10 + 2i) expect_equal(filter(d, x < 4)$y, 1:3 + 2i) expect_equal(filter(d, Re(y) < 4)$y, 1:3 + 2i) }) test_that("%in% works as expected (#126)", { df <- tibble(a = c("a", "b", "ab"), g = c(1, 1, 2)) res <- df %>% filter(a %in% letters) expect_equal(nrow(res), 2L) res <- df %>% group_by(g) %>% filter(a %in% letters) expect_equal(nrow(res), 2L) }) test_that("row_number does not segfault with example from #781", { z <- data.frame(a = c(1, 2, 3)) b <- "a" res <- z %>% filter(row_number(b) == 2) expect_equal(nrow(res), 0L) }) test_that("row_number works on 0 length columns (#3454)", { expect_identical( mutate(tibble(), a = row_number()), tibble(a = integer()) ) }) test_that("filter does not alter expression (#971)", { my_filter <- ~ am == 1 expect_equal(my_filter[[2]][[2]], as.name("am")) }) test_that("hybrid evaluation handles $ correctly (#1134)", { df <- tibble(x = 1:10, g = rep(1:5, 2)) res <- df %>% group_by(g) %>% filter(x > min(df$x)) expect_equal(nrow(res), 9L) }) test_that("filter correctly handles empty data frames (#782)", { res <- tibble() %>% filter(F) expect_equal(nrow(res), 0L) expect_equal(length(names(res)), 0L) }) test_that("filter(.,TRUE,TRUE) works (#1210)", { df <- data.frame(x = 1:5) res <- filter(df, TRUE, TRUE) expect_equal(res, df) }) test_that("filter, slice and arrange preserves attributes (#1064)", { df <- structure( data.frame(x = 1:10, g1 = rep(1:2, each = 5), g2 = rep(1:5, 2)), meta = "this is important" ) res <- filter(df, x < 5) %>% attr("meta") expect_equal(res, "this is important") res <- filter(df, x < 5, x > 4) %>% attr("meta") expect_equal(res, "this is important") res <- df %>% slice(1:50) %>% attr("meta") expect_equal(res, "this is important") res <- df %>% arrange(x) %>% attr("meta") expect_equal(res, "this is important") }) test_that("filter works with rowwise data (#1099)", { df <- tibble(First = c("string1", "string2"), Second = c("Sentence with string1", "something")) res <- df %>% rowwise() %>% filter(grepl(First, Second, fixed = TRUE)) expect_equal(nrow(res), 1L) expect_equal(df[1, ], ungroup(res)) }) test_that("grouped filter handles indices (#880)", { res <- iris %>% group_by(Species) %>% filter(Sepal.Length > 5) res2 <- mutate(res, Petal = Petal.Width * Petal.Length) expect_equal(nrow(res), nrow(res2)) expect_equal(group_rows(res), group_rows(res2)) expect_equal(group_keys(res), group_keys(res2)) }) test_that("filter(FALSE) handles indices", { out <- mtcars %>% group_by(cyl) %>% filter(FALSE, .preserve = TRUE) %>% group_rows() expect_identical(out, list_of(integer(), integer(), integer(), .ptype = integer())) out <- mtcars %>% group_by(cyl) %>% filter(FALSE, .preserve = FALSE) %>% group_rows() expect_identical(out, list_of(.ptype = integer())) }) test_that("filter handles S4 objects (#1366)", { env <- environment() Numbers <- suppressWarnings(setClass( "Numbers", slots = c(foo = "numeric"), contains = "integer", where = env )) setMethod("[", "Numbers", function(x, i, ...){ Numbers(unclass(x)[i, ...], foo = x@foo) }) on.exit(removeClass("Numbers", where = env)) df <- data.frame(x = Numbers(1:10, foo = 10)) res <- filter(df, x > 3) expect_s4_class(res$x, "Numbers") expect_equal(res$x@foo, 10) }) test_that("hybrid lag and default value for string columns work (#1403)", { res <- mtcars %>% mutate(xx = LETTERS[gear]) %>% filter(xx == lag(xx, default = "foo")) xx <- LETTERS[mtcars$gear] ok <- xx == lag(xx, default = "foo") expect_equal(xx[ok], res$xx) res <- mtcars %>% mutate(xx = LETTERS[gear]) %>% filter(xx == lead(xx, default = "foo")) xx <- LETTERS[mtcars$gear] ok <- xx == lead(xx, default = "foo") expect_equal(xx[ok], res$xx) }) # .data and .env tests now in test-hybrid-traverse.R test_that("filter handles raw vectors (#1803)", { df <- tibble(a = 1:3, b = as.raw(1:3)) expect_identical(filter(df, a == 1), tibble(a = 1L, b = as.raw(1))) expect_identical(filter(df, b == 1), tibble(a = 1L, b = as.raw(1))) }) test_that("`vars` attribute is not added if empty (#2772)", { expect_identical(tibble(x = 1:2) %>% filter(x == 1), tibble(x = 1L)) }) test_that("filter handles list columns", { res <- tibble(a=1:2, x = list(1:10, 1:5)) %>% filter(a == 1) %>% pull(x) expect_equal(res, list(1:10)) res <- tibble(a=1:2, x = list(1:10, 1:5)) %>% group_by(a) %>% filter(a == 1) %>% pull(x) expect_equal(res, list(1:10)) }) test_that("hybrid function row_number does not trigger warning in filter (#3750)", { out <- tryCatch({ mtcars %>% filter(row_number() > 1, row_number() < 5); TRUE }, warning = function(w) FALSE ) expect_true(out) }) test_that("filter() preserve order across groups (#3989)", { df <- tibble(g = c(1, 2, 1, 2, 1), time = 5:1, x = 5:1) res1 <- df %>% group_by(g) %>% filter(x <= 4) %>% arrange(time) res2 <- df %>% group_by(g) %>% arrange(time) %>% filter(x <= 4) res3 <- df %>% filter(x <= 4) %>% arrange(time) %>% group_by(g) expect_equal(res1, res2) expect_equal(res1, res3) expect_false(is.unsorted(res1$time)) expect_false(is.unsorted(res2$time)) expect_false(is.unsorted(res3$time)) }) test_that("filter() with two conditions does not freeze (#4049)", { expect_identical( iris %>% filter(Sepal.Length > 7, Petal.Length < 6), iris %>% filter(Sepal.Length > 7 & Petal.Length < 6) ) }) test_that("filter() handles matrix and data frame columns (#3630)", { df <- tibble( x = 1:2, y = matrix(1:4, ncol = 2), z = data.frame(A = 1:2, B = 3:4) ) expect_equal(filter(df, x == 1), df[1, ]) expect_equal(filter(df, y[,1] == 1), df[1, ]) expect_equal(filter(df, z$A == 1), df[1, ]) gdf <- group_by(df, x) expect_equal(filter(gdf, x == 1), gdf[1, ]) expect_equal(filter(gdf, y[,1] == 1), gdf[1, ]) expect_equal(filter(gdf, z$A == 1), gdf[1, ]) gdf <- group_by(df, y) expect_equal(filter(gdf, x == 1), gdf[1, ]) expect_equal(filter(gdf, y[,1] == 1), gdf[1, ]) expect_equal(filter(gdf, z$A == 1), gdf[1, ]) gdf <- group_by(df, z) expect_equal(filter(gdf, x == 1), gdf[1, ]) expect_equal(filter(gdf, y[,1] == 1), gdf[1, ]) expect_equal(filter(gdf, z$A == 1), gdf[1, ]) }) test_that("filter() handles named logical (#4638)", { tbl <- tibble(a = c(a = TRUE)) expect_equal(filter(tbl, a), tbl) }) test_that("filter() allows named constants that resolve to logical vectors (#4612)", { filters <- mtcars %>% transmute( cyl %in% 6:8, hp / drat > 50 ) expect_identical( mtcars %>% filter(!!!filters), mtcars %>% filter(!!!unname(filters)) ) }) test_that("filter() allows 1 dimension arrays", { df <- tibble(x = array(c(TRUE, FALSE, TRUE))) expect_identical(filter(df, x), df[c(1, 3),]) }) test_that("filter() allows matrices with 1 column with a deprecation warning (#6091)", { df <- tibble(x = 1:2) expect_snapshot({ out <- filter(df, matrix(c(TRUE, FALSE), nrow = 2)) }) expect_identical(out, tibble(x = 1L)) # Only warns once when grouped df <- tibble(x = c(1, 1, 2, 2)) gdf <- group_by(df, x) expect_snapshot({ out <- filter(gdf, matrix(c(TRUE, FALSE), nrow = 2)) }) expect_identical(out, group_by(tibble(x = c(1, 2)), x)) }) test_that("filter() disallows matrices with >1 column", { df <- tibble(x = 1:3) expect_snapshot({ (expect_error(filter(df, matrix(TRUE, nrow = 3, ncol = 2)))) }) }) test_that("filter() disallows arrays with >2 dimensions", { df <- tibble(x = 1:3) expect_snapshot({ (expect_error(filter(df, array(TRUE, dim = c(3, 1, 1))))) }) }) test_that("filter() gives useful error messages", { expect_snapshot({ # wrong type (expect_error( iris %>% group_by(Species) %>% filter(1:n()) )) (expect_error( iris %>% filter(1:n()) )) # matrix with > 1 columns (expect_error( filter(data.frame(x = 1:2), matrix(c(TRUE, FALSE, TRUE, FALSE), nrow = 2)) )) # wrong size (expect_error( iris %>% group_by(Species) %>% filter(c(TRUE, FALSE)) )) (expect_error( iris %>% rowwise(Species) %>% filter(c(TRUE, FALSE)) )) (expect_error( iris %>% filter(c(TRUE, FALSE)) )) # wrong size in column (expect_error( iris %>% group_by(Species) %>% filter(data.frame(c(TRUE, FALSE))) )) (expect_error( iris %>% rowwise() %>% filter(data.frame(c(TRUE, FALSE))) )) (expect_error( iris %>% filter(data.frame(c(TRUE, FALSE))) )) (expect_error( tibble(x = 1) %>% filter(c(TRUE, TRUE)) )) # wrong type in column (expect_error( iris %>% group_by(Species) %>% filter(data.frame(Sepal.Length > 3, 1:n())) )) (expect_error( iris %>% filter(data.frame(Sepal.Length > 3, 1:n())) )) # evaluation error (expect_error( mtcars %>% filter(`_x`) )) (expect_error( mtcars %>% group_by(cyl) %>% filter(`_x`) )) # named inputs (expect_error( filter(mtcars, x = 1) )) (expect_error( filter(mtcars, y > 2, z = 3) )) (expect_error( filter(mtcars, TRUE, x = 1) )) # ts (expect_error( filter(ts(1:10)) )) # Error that contains { (expect_error( tibble() %>% filter(stop("{")) )) # across() in filter() does not warn yet data.frame(x = 1, y = 1) %>% filter(across(everything(), ~ .x > 0)) data.frame(x = 1, y = 1) %>% filter(data.frame(x > 0, y > 0)) }) }) test_that("filter preserves grouping", { gf <- group_by(tibble(g = c(1, 1, 1, 2, 2), x = 1:5), g) i <- count_regroups(out <- filter(gf, x %in% c(3,4))) expect_equal(i, 0L) expect_equal(group_vars(gf), "g") expect_equal(group_rows(out), list_of(1L, 2L)) i <- count_regroups(out <- filter(gf, x < 3)) expect_equal(i, 0L) expect_equal(group_vars(gf), "g") expect_equal(group_rows(out), list_of(c(1L, 2L))) }) test_that("filter() with empty dots still calls dplyr_row_slice()", { tbl <- new_tibble(list(x = 1), nrow = 1L) foo <- structure(tbl, class = c("foo_df", class(tbl))) local_methods( # `foo_df` always loses class when row slicing dplyr_row_slice.foo_df = function(data, i, ...) { out <- NextMethod() new_tibble(out, nrow = nrow(out)) } ) expect_s3_class(filter(foo), class(tbl), exact = TRUE) expect_s3_class(filter(foo, x == 1), class(tbl), exact = TRUE) }) test_that("can filter() with unruly class", { local_methods( `[.dplyr_foobar` = function(x, i, ...) new_dispatched_quux(vec_slice(x, i)), dplyr_row_slice.dplyr_foobar = function(x, i, ...) x[i, ] ) df <- foobar(data.frame(x = 1:3)) expect_identical( filter(df, x <= 2), quux(data.frame(x = 1:2, dispatched = TRUE)) ) }) test_that("filter() preserves the call stack on error (#5308)", { foobar <- function() stop("foo") stack <- NULL expect_error( withCallingHandlers( error = function(...) stack <<- sys.calls(), filter(mtcars, foobar()) ) ) expect_true(some(stack, is_call, "foobar")) }) test_that("if_any() and if_all() work", { df <- tibble(x1 = 1:10, x2 = c(1:5, 10:6)) expect_equal( filter(df, if_all(starts_with("x"), ~ . > 6)), filter(df, x1 > 6 & x2 > 6) ) expect_equal( filter(df, if_any(starts_with("x"), ~ . > 6)), filter(df, x1 > 6 | x2 > 6) ) }) test_that("filter keeps zero length groups", { df <- tibble( e = 1, f = factor(c(1, 1, 2, 2), levels = 1:3), g = c(1, 1, 2, 2), x = c(1, 2, 1, 4) ) df <- group_by(df, e, f, g, .drop = FALSE) expect_equal(group_size(filter(df, f == 1)), c(2, 0, 0) ) }) test_that("filtering retains labels for zero length groups", { df <- tibble( e = 1, f = factor(c(1, 1, 2, 2), levels = 1:3), g = c(1, 1, 2, 2), x = c(1, 2, 1, 4) ) df <- group_by(df, e, f, g, .drop = FALSE) expect_equal( ungroup(count(filter(df, f == 1))), tibble( e = 1, f = factor(1:3), g = c(1, 2, NA), n = c(2L, 0L, 0L) ) ) }) test_that("`filter()` doesn't allow data frames with missing or empty names (#6758)", { df1 <- new_data_frame(set_names(list(1), "")) df2 <- new_data_frame(set_names(list(1), NA_character_)) expect_snapshot(error = TRUE, { filter(df1) }) expect_snapshot(error = TRUE, { filter(df2) }) }) # .by ------------------------------------------------------------------------- test_that("can group transiently using `.by`", { df <- tibble(g = c(1, 1, 2, 1, 2), x = c(5, 10, 1, 2, 3)) out <- filter(df, x > mean(x), .by = g) expect_identical(out$g, c(1, 2)) expect_identical(out$x, c(10, 3)) expect_s3_class(out, class(df), exact = TRUE) }) test_that("transient grouping retains bare data.frame class", { df <- tibble(g = c(1, 1, 2, 1, 2), x = c(5, 10, 1, 2, 3)) out <- filter(df, x > mean(x), .by = g) expect_s3_class(out, class(df), exact = TRUE) }) test_that("transient grouping retains data frame attributes", { # With data.frames or tibbles df <- data.frame(g = c(1, 1, 2), x = c(1, 2, 1)) tbl <- as_tibble(df) attr(df, "foo") <- "bar" attr(tbl, "foo") <- "bar" out <- filter(df, x > mean(x), .by = g) expect_identical(attr(out, "foo"), "bar") out <- filter(tbl, x > mean(x), .by = g) expect_identical(attr(out, "foo"), "bar") }) test_that("can't use `.by` with `.preserve`", { df <- tibble(x = 1) expect_snapshot(error = TRUE, { filter(df, .by = x, .preserve = TRUE) }) }) test_that("catches `.by` with grouped-df", { df <- tibble(x = 1) gdf <- group_by(df, x) expect_snapshot(error = TRUE, { filter(gdf, .by = x) }) }) test_that("catches `.by` with rowwise-df", { df <- tibble(x = 1) rdf <- rowwise(df) expect_snapshot(error = TRUE, { filter(rdf, .by = x) }) }) test_that("catches `by` typo (#6647)", { df <- tibble(x = 1) expect_snapshot(error = TRUE, { filter(df, by = x) }) }) dplyr/tests/testthat/test-funs.R0000644000176200001440000000703414366556340016501 0ustar liggesusers# between ------------------------------------------------------------------- test_that("returns NA if any argument is NA", { na <- NA_real_ expect_equal(between(1, 1, na), NA) expect_equal(between(1, na, 1), NA) expect_equal(between(na, 1, 1), NA) }) test_that("can be vectorized along `left` and `right`", { expect_identical(between(1:2, c(0L, 4L), 5L), c(TRUE, FALSE)) expect_identical(between(1:2, 0L, c(0L, 3L)), c(FALSE, TRUE)) }) test_that("compatible with base R", { x <- runif(1e3) expect_equal(between(x, 0.25, 0.5), x >= 0.25 & x <= 0.5) }) test_that("works with S3 objects", { x <- new_vctr(c(1, 5), class = "foo") left <- new_vctr(0, class = "foo") right <- new_vctr(3, class = "foo") expect_identical(between(x, left, right), c(TRUE, FALSE)) }) test_that("works with date-time `x` and date `left/right` (#6183)", { jan2 <- as.POSIXct("2022-01-02", tz = "UTC") jan1 <- as.Date("2022-01-01") jan3 <- as.Date("2022-01-03") expect_true(between(jan2, jan1, jan3)) }) test_that("works with data frames", { x <- tibble(year = c(2020, 2020, 2021), month = c(1, 3, 6)) left <- tibble(year = c(2019, 2020, 2021), month = c(1, 4, 3)) right <- tibble(year = c(2020, 2020, 2022), month = c(1, 6, 3)) expect_identical(between(x, left, right), c(TRUE, FALSE, TRUE)) }) test_that("works with rcrds", { x <- new_rcrd(list(year = c(2020, 2020, 2021), month = c(1, 3, 6))) left <- new_rcrd(list(year = c(2019, 2020, 2021), month = c(1, 4, 3))) right <- new_rcrd(list(year = c(2020, 2020, 2022), month = c(1, 6, 3))) expect_identical(between(x, left, right), c(TRUE, FALSE, TRUE)) }) test_that("takes the common type between all inputs (#6478)", { expect_identical(between(1L, 1.5, 2L), FALSE) expect_identical(between(1L, 0.5, 2.5), TRUE) expect_snapshot(error = TRUE, { between("1", 2, 3) }) expect_snapshot(error = TRUE, { between(1, "2", 3) }) expect_snapshot(error = TRUE, { between(1, 2, "3") }) }) test_that("recycles `left` and `right` to the size of `x`", { expect_snapshot(error = TRUE, { between(1:3, 1:2, 1L) }) expect_snapshot(error = TRUE, { between(1:3, 1L, 1:2) }) }) # cum* -------------------------------------------------------------------- test_that("cum(sum,min,max) return expected results for simple cases", { expect_equal(cummean(numeric()), numeric()) x <- c(5, 10, 2, 4) expect_equal(cummean(x), cumsum(x) / seq_along(x)) expect_equal(cumany(logical()), logical()) expect_equal(cumany(FALSE), FALSE) expect_equal(cumany(TRUE), TRUE) expect_equal(cumany(c(FALSE, FALSE)), c(FALSE, FALSE)) expect_equal(cumany(c(TRUE, FALSE)), c(TRUE, TRUE)) expect_equal(cumany(c(FALSE, TRUE)), c(FALSE, TRUE)) expect_equal(cumany(c(TRUE, TRUE)), c(TRUE, TRUE)) expect_equal(cumall(logical()), logical()) expect_equal(cumall(FALSE), FALSE) expect_equal(cumall(TRUE), TRUE) expect_equal(cumall(c(FALSE, FALSE)), c(FALSE, FALSE)) expect_equal(cumall(c(TRUE, FALSE)), c(TRUE, FALSE)) expect_equal(cumall(c(FALSE, TRUE)), c(FALSE, FALSE)) expect_equal(cumall(c(TRUE, TRUE)), c(TRUE, TRUE)) }) test_that("cumany/cumall propagate NAs (#408, #3749, #4132)", { expect_equal(cumall(c(NA, NA)), c(NA, NA)) expect_equal(cumall(c(NA, TRUE)), c(NA, NA)) expect_equal(cumall(c(NA, FALSE)), c(NA, FALSE)) expect_equal(cumany(c(NA, NA)), c(NA, NA)) expect_equal(cumany(c(NA, TRUE)), c(NA, TRUE)) expect_equal(cumany(c(NA, FALSE)), c(NA, NA)) }) test_that("cummean is not confused by FP error (#1387)", { a <- rep(99, 9) expect_true(all(cummean(a) == a)) }) dplyr/tests/testthat/test-all-equal.R0000644000176200001440000001171614366556340017405 0ustar liggesuserstest_that("all_equal is deprecated", { expect_snapshot(all_equal(mtcars, mtcars)) }) # A data frame with all major types df_all <- data.frame( a = c(1, 2.5), b = 1:2, c = c(T, F), d = c("a", "b"), e = factor(c("a", "b")), f = Sys.Date() + 1:2, g = Sys.time() + 1:2, stringsAsFactors = FALSE ) test_that("data frames equal to themselves", { local_options(lifecycle_verbosity = "quiet") expect_true(all_equal(mtcars, mtcars)) expect_true(all_equal(iris, iris)) expect_true(all_equal(df_all, df_all)) }) test_that("data frames not equal if missing row", { local_options(lifecycle_verbosity = "quiet") expect_snapshot({ all_equal(mtcars, mtcars[-1, ]) all_equal(iris, iris[-1, ]) all_equal(df_all, df_all[-1, ]) }) }) test_that("data frames not equal if missing col", { local_options(lifecycle_verbosity = "quiet") expect_snapshot({ all_equal(mtcars, mtcars[, -1]) all_equal(iris, iris[, -1]) all_equal(df_all, df_all[, -1]) }) }) test_that("factors equal only if levels equal", { local_options(lifecycle_verbosity = "quiet") df1 <- tibble(x = factor(c("a", "b"))) df2 <- tibble(x = factor(c("a", "d"))) expect_snapshot({ all_equal(df1, df2) all_equal(df2, df1) }) }) test_that("factor comparison requires strict equality of levels (#2440)", { local_options(lifecycle_verbosity = "quiet") df1 <- tibble(x = factor("a")) df2 <- tibble(x = factor("a", levels = c("a", "b"))) expect_true(all_equal(df1, df2, convert = TRUE)) expect_true(all_equal(df2, df1, convert = TRUE)) expect_snapshot({ all_equal(df1, df2) all_equal(df2, df1) }) }) test_that("all.equal.data.frame handles data.frames with NULL names", { local_options(lifecycle_verbosity = "quiet") x <- data.frame(LETTERS[1:3], rnorm(3)) names(x) <- NULL suppressMessages( expect_true(all_equal(x, x)) ) }) test_that("data frame equality test with ignore_row_order=TRUE detects difference in number of rows. #1065", { local_options(lifecycle_verbosity = "quiet") DF1 <- tibble(a = 1:4, b = letters[1:4]) DF2 <- tibble(a = c(1:4, 4L), b = letters[c(1:4, 4L)]) expect_false(isTRUE(all_equal(DF1, DF2, ignore_row_order = TRUE))) DF1 <- tibble(a = c(1:4, 2L), b = letters[c(1:4, 2L)]) DF2 <- tibble(a = c(1:4, 4L), b = letters[c(1:4, 4L)]) expect_false(isTRUE(all_equal(DF1, DF2, ignore_row_order = TRUE))) }) test_that("all.equal handles NA_character_ correctly. #1095", { local_options(lifecycle_verbosity = "quiet") d1 <- tibble(x = c(NA_character_)) expect_true(all_equal(d1, d1)) d2 <- tibble(x = c(NA_character_, "foo", "bar")) expect_true(all_equal(d2, d2)) }) test_that("handle Date columns of different types, integer and numeric (#1204)", { local_options(lifecycle_verbosity = "quiet") a <- data.frame(date = as.Date("2015-06-07")) b <- data.frame(date = structure(as.integer(a$date), class = "Date")) expect_true(all_equal(a, b)) }) test_that("equality test fails when convert is FALSE and types don't match (#1484)", { local_options(lifecycle_verbosity = "quiet") df1 <- tibble(x = "a") df2 <- tibble(x = factor("a")) expect_true(all_equal(df1, df2, convert = TRUE)) expect_snapshot({ all_equal(df1, df2, convert = FALSE) }) }) test_that("equality handles data frames with 0 rows (#1506)", { df0 <- tibble(x = numeric(0), y = character(0)) expect_equal(df0, df0) }) test_that("equality handles data frames with 0 columns (#1506)", { df0 <- tibble(a = 1:10)[-1] expect_equal(df0, df0) }) test_that("equality handle raw columns", { local_options(lifecycle_verbosity = "quiet") df <- tibble(a = 1:3, b = as.raw(1:3)) expect_true(all_equal(df, df)) }) test_that("equality returns a message for convert = TRUE", { local_options(lifecycle_verbosity = "quiet") df1 <- tibble(x = 1:3) df2 <- tibble(x = as.character(1:3)) expect_snapshot({ all_equal(df1, df2) all_equal(df1, df2, convert = TRUE) }) }) test_that("numeric and integer can be compared if convert = TRUE", { local_options(lifecycle_verbosity = "quiet") df1 <- tibble(x = 1:3) df2 <- tibble(x = as.numeric(1:3)) expect_true(all_equal(df1, df2, convert = TRUE)) expect_snapshot({ all_equal(df1, df2) }) }) test_that("returns vector for more than one difference (#1819)", { local_options(lifecycle_verbosity = "quiet") expect_snapshot({ all_equal(tibble(a = 1, b = 2), tibble(a = 1L, b = 2L)) }) }) test_that("ignore column order", { local_options(lifecycle_verbosity = "quiet") expect_snapshot({ all_equal(tibble(a = 1, b = 2), tibble(b = 2, a = 1), ignore_col_order = FALSE) all_equal(tibble(a = 1, b = 2), tibble(a = 1), ignore_col_order = FALSE) }) }) # Errors ------------------------------------------------------------------ test_that("count() give meaningful errors", { local_options(lifecycle_verbosity = "quiet") expect_snapshot({ (expect_error(union(tibble(a = 1), tibble(a = "1")))) (expect_error(union(tibble(a = 1, b = 2), tibble(a = "1", b = "2")))) }) }) dplyr/tests/testthat/test-recode.R0000644000176200001440000001236514366556340016772 0ustar liggesuserstest_that("positional substitution works", { expect_equal(recode(1:2, "a", "b"), c("a", "b")) }) test_that("names override positions", { expect_equal(recode(1:2, `2` = "b", `1` = "a"), c("a", "b")) }) test_that("named substitution works", { x1 <- letters[1:3] x2 <- factor(x1) expect_equal(recode(x1, a = "apple", .default = NA_character_), c("apple", NA, NA)) expect_equal(recode(x2, a = "apple", .default = NA_character_), factor(c("apple", NA, NA))) }) test_that("missing values replaced by missing argument", { expect_equal(recode(c(1, NA), "a"), c("a", NA)) expect_equal(recode(c(1, NA), "a", .missing = "b"), c("a", "b")) expect_equal(recode(c(letters[1:3], NA), .missing = "A"), c("a", "b", "c", "A")) }) test_that("unmatched value replaced by default argument", { expect_warning(expect_equal(recode(c(1, 2), "a"), c("a", NA))) expect_equal(recode(c(1, 2), "a", .default = "b"), c("a", "b")) expect_equal(recode(letters[1:3], .default = "A"), c("A", "A", "A")) }) test_that("missing and default place nicely together", { expect_equal( recode(c(1, 2, NA), "a", .default = "b", .missing = "c"), c("a", "b", "c") ) }) test_that("can give name x", { expect_equal(recode("x", x = "a"), "a") }) test_that(".default works when not all values are named", { x <- rep(1:3, 3) expect_equal(recode(x, `3` = 10L, .default = x), rep(c(1L, 2L, 10L), 3)) }) test_that(".default is aliased to .x when missing and compatible", { x <- letters[1:3] expect_equal(recode(x, a = "A"), c("A", "b", "c")) n <- 1:3 expect_equal(recode(n, `1` = 10L), c(10L, 2L, 3L)) }) test_that(".default is not aliased to .x when missing and not compatible", { x <- letters[1:3] expect_warning(expect_equal(recode(x, a = 1), c(1L, NA, NA))) n <- 1:3 expect_warning(expect_equal(recode(n, `1` = "a"), c("a", NA, NA))) }) test_that("conversion of unreplaced values to NA gives warning", { expect_warning(recode(1:3, `1` = "a"), "treated as NA") expect_warning(recode_factor(letters[1:3], b = 1, c = 2)) }) test_that(".dot argument works correctly (PR #2110)", { x1 <- letters[1:3] x2 <- 1:3 x3 <- factor(x1) expect_equal( recode(x1, a = "apple", b = "banana", .default = NA_character_), recode(x1, .default = NA_character_, !!!list(a = "apple", b = "banana")) ) expect_equal( recode(x1, a = "apple", b = "banana", .default = NA_character_), recode(x1, a = "apple", .default = NA_character_, !!!list(b = "banana")) ) expect_equal( recode(x2, "1" = 4, "2" = 5, .default = NA_real_), recode(x2, .default = NA_real_, !!!list("1" = 4, "2" = 5)) ) expect_equal( recode(x2, "1" = 4, "2" = 5, .default = NA_real_), recode(x2, "1" = 4, .default = NA_real_, !!!list("2" = 5)) ) expect_equal( recode_factor(x3, a = "apple", b = "banana", .default = NA_character_), recode_factor(x3, .default = NA_character_, !!!list(a = "apple", b = "banana")) ) }) test_that("`recode()` signals that it is superseded", { expect_snapshot(catch_cnd(recode("a", a = "A"))) }) # factor ------------------------------------------------------------------ test_that("default .default works with factors", { expect_equal(recode(factor(letters[1:3]), a = "A"), factor(c("A", "b", "c"))) }) test_that("can recode factor to double", { f <- factor(letters[1:3]) expect_equal(recode(f, a = 1, b = 2, c = 3), c(1, 2, 3)) expect_equal(recode(f, a = 1, b = 2), c(1, 2, NA)) expect_equal(recode(f, a = 1, b = 2, .default = 99), c(1, 2, 99)) }) test_that("recode_factor() handles .missing and .default levels", { x <- c(1:3, NA) expect_warning( expect_equal( recode_factor(x, `1` = "z", `2` = "y"), factor(c("z", "y", NA, NA), levels = c("z", "y")) ) ) expect_equal( recode_factor(x, `1` = "z", `2` = "y", .default = "D"), factor(c("z", "y", "D", NA), levels = c("z", "y", "D")) ) expect_equal( recode_factor(x, `1` = "z", `2` = "y", .default = "D", .missing = "M"), factor(c("z", "y", "D", "M"), c("z", "y", "D", "M")) ) }) test_that("recode_factor() handles vector .default", { expected <- factor(c("a", "z", "y"), levels = c("z", "y", "a")) x1 <- letters[1:3] x2 <- factor(x1) expect_equal(recode_factor(x1, b = "z", c = "y"), expected) expect_equal(recode_factor(x2, b = "z", c = "y"), expected) expect_equal(recode_factor(x1, b = "z", c = "y", .default = x1), expected) expect_equal(recode_factor(x2, b = "z", c = "y", .default = x1), expected) }) test_that("can recode factor with redundant levels", { expect_equal( recode(factor(letters[1:4]), d = "c", b = "a"), factor(c("a", "a", "c", "c"), levels = c("a", "c")) ) expect_equal( recode_factor(letters[1:4], d = "c", b = "a"), factor(c("a", "a", "c", "c"), levels = c("c", "a")) ) }) test_that("`recode_factor()` signals that it is superseded", { expect_snapshot(catch_cnd(recode_factor("a", a = "A"))) }) # Errors -------------------------------------------- test_that("recode() gives meaningful error messages", { expect_snapshot({ (expect_error(recode(factor("a"), a = 5, .missing = 10))) (expect_error(recode("a", b = 5, "c"))) (expect_error(recode(factor("a"), b = 5, "c"))) # no replacement (expect_error(recode(1:5))) (expect_error(recode("a"))) (expect_error(recode(factor("a")))) }) }) dplyr/tests/testthat/test-group-data.R0000644000176200001440000000666614366556340017603 0ustar liggesusers # group_data -------------------------------------------------------------- test_that("group_data() returns a data frame", { df <- data.frame(x = 1:3) gd <- group_data(df) expect_s3_class(gd, "data.frame", exact = TRUE) expect_equal(gd$.rows, list_of(1:3)) }) test_that("group_data() returns a tibble", { df <- tibble(x = 1:3) gd <- group_data(df) expect_s3_class(gd, "tbl_df") expect_equal(gd, tibble(".rows" := list_of(1:3))) }) test_that("group_data() returns a tibble", { df <- tibble(x = c(1, 1, 2)) gf <- group_by(df, x) gd <- group_data(gf) expect_s3_class(gd, "tbl_df") expect_equal( gd, tibble(x = c(1, 2), ".rows" := list_of(1:2, 3L)), ignore_attr = TRUE ) }) test_that("group_data(% group_keys(x), "deprecated") expect_equal(out, tibble(x = 1)) }) # group_indices() --------------------------------------------------------- test_that("no arg group_indices() is deprecated", { df <- tibble(x = 1) expect_warning(out <- summarise(df, id = group_indices()), "deprecated") expect_equal(out, tibble(id = 1)) }) test_that("group_indices(...) is deprecated", { rlang::local_options(lifecycle_verbosity = "error") df <- tibble(x = 1, y = 2) expect_error(df %>% group_indices(x), "deprecated") }) test_that("group_indices(...) still works though", { rlang::local_options(lifecycle_verbosity = "quiet") df <- tibble(x = 1, y = 2) out <- df %>% group_indices(x) expect_equal(out, 1) }) test_that("group_indices() returns expected values", { df <- tibble(x = c("b", "a", "b")) gf <- group_by(df, x) expect_equal(group_indices(df), c(1, 1, 1)) expect_equal(group_indices(gf), c(2, 1, 2)) }) test_that("group_indices() handles 0 rows data frames (#5541)", { df <- new_grouped_df( data.frame(x = integer(), y = integer()), groups = data.frame(x=0, .rows = vctrs::list_of(1:1000)) ) expect_equal(group_indices(df), integer()) }) # group_size -------------------------------------------------------------- test_that("ungrouped data has 1 group, with group size = nrow()", { df <- tibble(x = rep(1:3, each = 10), y = rep(1:6, each = 5)) expect_equal(n_groups(df), 1L) expect_equal(group_size(df), 30) }) test_that("rowwise data has one group for each group", { rw <- rowwise(mtcars) expect_equal(n_groups(rw), 32) expect_equal(group_size(rw), rep(1, 32)) }) test_that("group_size correct for grouped data", { df <- tibble(x = rep(1:3, each = 10), y = rep(1:6, each = 5)) %>% group_by(x) expect_equal(n_groups(df), 3L) expect_equal(group_size(df), rep(10, 3)) }) # n_groups ---------------------------------------------------------------- test_that("n_groups respects zero-length groups (#341)", { df <- tibble(x = factor(1:3, levels = 1:4)) %>% group_by(x, .drop = FALSE) expect_equal(n_groups(df), 4) }) dplyr/tests/testthat/test-select-helpers.R0000644000176200001440000000107113663216626020437 0ustar liggesuserstest_that("group_cols() selects grouping variables", { df <- tibble(x = 1:3, y = 1:3) gf <- group_by(df, x) expect_equal(df %>% select(group_cols()), df[integer()]) expect_message( expect_equal(gf %>% select(group_cols()), gf["x"]), NA ) }) test_that("group_cols(vars) is deprecated", { expect_warning(out <- group_cols("a"), "deprecated") expect_equal(out, integer()) }) test_that("group_cols() finds groups in scoped helpers", { gf <- group_by(tibble(x = 1, y = 2), x) out <- select_at(gf, vars(group_cols())) expect_named(out, "x") }) dplyr/tests/testthat/test-colwise-mutate.R0000644000176200001440000002647514366556340020502 0ustar liggesuserstest_that("can use character vectors or bare functions", { df <- data.frame(x = 1:3) expect_equal(summarise_all(df, "mean"), data.frame(x = 2)) expect_equal(summarise_all(df, mean), data.frame(x = 2)) expect_equal(mutate_all(df, list(x = "mean")), data.frame(x = rep(2, 3))) expect_equal(mutate_all(df, list(x = mean)), data.frame(x = rep(2, 3))) }) test_that("default names are smallest unique set", { df <- data.frame(x = 1:3, y = 1:3) expect_named(summarise_at(df, vars(x:y), list(mean)), c("x", "y")) expect_named(summarise_at(df, vars(x), list(mean = mean, sd = sd)), c("mean", "sd")) expect_named(summarise_at(df, vars(x:y), list(mean = mean, sd = sd)), c("x_mean", "y_mean", "x_sd", "y_sd")) }) test_that("named arguments force complete names", { df <- data.frame(x = 1:3, y = 1:3) expect_named(summarise_at(df, vars(x:y), list(mean = mean)), c("x_mean", "y_mean")) expect_named(summarise_at(df, vars(x = x), list(mean = mean, sd = sd)), c("x_mean", "x_sd")) }) expect_classes <- function(tbl, expected) { classes <- unname(map_chr(tbl, class)) classes <- paste0(substring(classes, 0, 1), collapse = "") expect_equal(classes, expected) } test_that("can select colwise", { columns <- iris %>% mutate_at(NULL, as.character) expect_classes(columns, "nnnnf") columns <- iris %>% mutate_at(vars(starts_with("Petal")), as.character) expect_classes(columns, "nnccf") numeric <- iris %>% mutate_at(c(1, 3), as.character) expect_classes(numeric, "cncnf") character <- iris %>% mutate_at("Species", as.character) expect_classes(character, "nnnnc") }) test_that("can probe colwise", { predicate <- iris %>% mutate_if(is.factor, as.character) expect_classes(predicate, "nnnnc") logical <- iris %>% mutate_if(c(TRUE, FALSE, TRUE, TRUE, FALSE), as.character) expect_classes(logical, "cnccf") }) test_that("non syntactic colnames work", { df <- tibble(`x 1` = 1:3) expect_identical(summarise_at(df, "x 1", sum)[[1]], 6L) expect_identical(summarise_if(df, is.numeric, sum)[[1]], 6L) expect_identical(summarise_all(df, sum)[[1]], 6L) expect_identical(mutate_all(df, `*`, 2)[[1]], (1:3) * 2) }) test_that("empty selection does not select everything (#2009, #1989)", { expect_equal( tibble::remove_rownames(mtcars), tibble::remove_rownames(mutate_if(mtcars, is.factor, as.character)) ) }) test_that("predicate can be quoted", { expected <- mutate_if(mtcars, is_integerish, mean) expect_identical(mutate_if(mtcars, "is_integerish", mean), expected) expect_identical(mutate_if(mtcars, ~ is_integerish(.x), mean), expected) }) test_that("transmute verbs do not retain original variables", { expect_named(transmute_all(tibble(x = 1:3, y = 1:3), list(mean = mean, sd = sd)), c("x_mean", "y_mean", "x_sd", "y_sd")) expect_named(transmute_if(tibble(x = 1:3, y = 1:3), is_integer, list(mean = mean, sd = sd)), c("x_mean", "y_mean", "x_sd", "y_sd")) expect_named(transmute_at(tibble(x = 1:3, y = 1:3), vars(x:y), list(mean = mean, sd = sd)), c("x_mean", "y_mean", "x_sd", "y_sd")) }) test_that("can rename with vars() (#2594)", { expect_identical( mutate_at(tibble(x = 1:3), vars(y = x), mean), tibble(x = 1:3, y = c(2, 2, 2)) ) }) test_that("selection works with grouped data frames (#2624)", { gdf <- group_by(iris, Species) expect_snapshot(out <- mutate_if(gdf, is.factor, as.character)) expect_identical(out, gdf) }) test_that("at selection works even if not all ops are named (#2634)", { df <- tibble(x = 1, y = 2) expect_identical(mutate_at(df, vars(z = x, y), list(~. + 1)), tibble(x = 1, y = 3, z = 2)) }) test_that("can use a purrr-style lambda", { expect_identical(summarise_at(mtcars, vars(1:2), ~ mean(.x)), summarise(mtcars, mpg = mean(mpg), cyl = mean(cyl))) }) test_that("mutate and transmute variants does not mutate grouping variable (#3351, #3480)", { tbl <- tibble(gr1 = rep(1:2, 4), gr2 = rep(1:2, each = 4), x = 1:8) %>% group_by(gr1) res <- mutate(tbl, gr2 = sqrt(gr2), x = sqrt(x)) expect_message(expect_identical(mutate_all(tbl, sqrt), res), "ignored") expect_message(expect_identical(transmute_all(tbl, sqrt), res), "ignored") expect_message(expect_identical(mutate_if(tbl, is.integer, sqrt), res), "ignored") expect_message(expect_identical(transmute_if(tbl, is.integer, sqrt), res), "ignored") expect_identical(transmute_at(tbl, vars(-group_cols()), sqrt), res) expect_identical(mutate_at(tbl, vars(-group_cols()), sqrt), res) }) test_that("summarise variants does not summarise grouping variable (#3351, #3480)", { tbl <- tibble(gr1 = rep(1:2, 4), gr2 = rep(1:2, each = 4), x = 1:8) %>% group_by(gr1) res <- summarise(tbl, gr2 = mean(gr2), x = mean(x)) expect_identical(summarise_all(tbl, mean), res) expect_identical(summarise_if(tbl, is.integer, mean), res) }) test_that("summarise_at removes grouping variables (#3613)", { d <- tibble( x = 1:2, y = 3:4, g = 1:2) %>% group_by(g) res <- d %>% group_by(g) %>% summarise_at(-1, mean) expect_equal(names(res), c("g", "y")) }) test_that("group_by_(at,all) handle utf-8 names (#3829)", { local_non_utf8_encoding() name <- get_native_lang_string() tbl <- tibble(a = 1) %>% setNames(name) res <- group_by_all(tbl) %>% groups() expect_equal(res[[1]], sym(name)) res <- group_by_at(tbl, name) %>% groups() expect_equal(res[[1]], sym(name)) }) test_that("*_(all,at) handle utf-8 names (#2967)", { local_non_utf8_encoding() name <- get_native_lang_string() tbl <- tibble(a = 1) %>% setNames(name) res <- tbl %>% mutate_all(list(as.character)) %>% names() expect_equal(res, name) res <- tbl %>% mutate_at(name, list(as.character)) %>% names() expect_equal(res, name) res <- tbl %>% summarise_all(list(as.character)) %>% names() expect_equal(res, name) res <- tbl %>% summarise_at(name, list(as.character)) %>% names() expect_equal(res, name) res <- select_at(tbl, name) %>% names() expect_equal(res, name) }) test_that("summarise_at with multiple columns AND unnamed functions works (#4119)", { res <- storms %>% summarise_at(vars(wind, pressure), list(mean, median)) expect_equal(ncol(res), 4L) expect_equal(names(res), c("wind_fn1", "pressure_fn1", "wind_fn2", "pressure_fn2")) res <- storms %>% summarise_at(vars(wind, pressure), list(n = length, mean, median)) expect_equal(ncol(res), 6L) expect_equal(names(res), c("wind_n", "pressure_n", "wind_fn1", "pressure_fn1", "wind_fn2", "pressure_fn2")) }) test_that("mutate_at with multiple columns AND unnamed functions works (#4119)", { res <- storms %>% mutate_at(vars(wind, pressure), list(mean, median)) expect_equal(ncol(res), ncol(storms) + 4L) expect_equal( names(res), c(names(storms), c("wind_fn1", "pressure_fn1", "wind_fn2", "pressure_fn2")) ) }) test_that("colwise mutate have .data in scope of rlang lambdas (#4183)", { results <- list( iris %>% mutate_if(is.numeric, ~ . / iris$Petal.Width), iris %>% mutate_if(is.numeric, ~ . / Petal.Width), iris %>% mutate_if(is.numeric, ~ . / .data$Petal.Width), iris %>% mutate_if(is.numeric, list(~ . / iris$Petal.Width )), iris %>% mutate_if(is.numeric, list(~ . / Petal.Width )), iris %>% mutate_if(is.numeric, list(~ . / .data$Petal.Width)), iris %>% mutate_if(is.numeric, ~ .x / iris$Petal.Width), iris %>% mutate_if(is.numeric, ~ .x / Petal.Width), iris %>% mutate_if(is.numeric, ~ .x / .data$Petal.Width), iris %>% mutate_if(is.numeric, list(~ .x / iris$Petal.Width )), iris %>% mutate_if(is.numeric, list(~ .x / Petal.Width )), iris %>% mutate_if(is.numeric, list(~ .x / .data$Petal.Width)) ) for(i in 2:12) { expect_equal(results[[1]], results[[i]]) } }) test_that("can choose the name of vars with multiple funs (#4180)", { expect_identical( mtcars %>% group_by(cyl) %>% summarise_at(vars(DISP = disp), list(mean = mean, median = median)), mtcars %>% group_by(cyl) %>% summarise(DISP_mean = mean(disp), DISP_median = median(disp)) ) }) test_that("summarise_at() unquotes in lambda (#4287)", { df <- tibble::tibble(year = seq(2015, 2050, 5), P = 5.0 + 2.5 * year) year <- 2037 expect_equal( summarise_at(df, vars(-year), ~approx(x = year, y = ., xout = !!year)$y), summarise(df, P = approx(x = year, y = P, xout = !!year)$y) ) }) test_that("mutate_at() unquotes in lambdas (#4199)", { df <- tibble(a = 1:10, b = runif(1:10), c = letters[1:10]) varname <- "a" symname <- rlang::sym(varname) quoname <- enquo(symname) expect_identical( df %>% mutate(b = mean(!!quoname)), df %>% mutate_at(vars(matches("b")), list(~mean(!!quoname))) ) }) test_that("summarise_at() can refer to local variables and columns (#4304)", { # using local here in case someone wants to run the content of the test # as opposed to the test_that() call res <- local({ value <- 10 expect_identical( iris %>% summarise_at("Sepal.Length", ~ sum(. / value)), iris %>% summarise(Sepal.Length = sum(Sepal.Length / value)) ) }) }) test_that("colwise mutate handles formulas with constants (#4374)", { expect_identical( tibble(x = 12) %>% mutate_all(~ 42), tibble(x = 42) ) expect_identical( tibble(x = 12) %>% mutate_at("x", ~ 42), tibble(x = 42) ) }) test_that("colwise mutate handle named chr vectors", { res <- tibble(x = 1:10) %>% mutate_at(c(y = "x"), mean) expect_identical(res, tibble(x = 1:10, y = 5.5)) }) test_that("colwise verbs deprecate quosures (#4330)", { expect_snapshot({ (expect_warning(mutate_at(mtcars, vars(mpg), quo(mean(.))))) (expect_warning(summarise_at(mtcars, vars(mpg), quo(mean(.))))) }) }) test_that("rlang lambda inherit from the data mask (#3843)", { res <- iris %>% mutate_at( vars(starts_with("Petal")), ~ ifelse(Species == "setosa" & . < 1.5, NA, .) ) expected <- iris %>% mutate( Petal.Length = ifelse(Species == "setosa" & Petal.Length < 1.5, NA, Petal.Length), Petal.Width = ifelse(Species == "setosa" & Petal.Width < 1.5, NA, Petal.Width) ) expect_identical(res, expected) res <- iris %>% group_by(Species) %>% mutate_at( vars(starts_with("Petal")), ~ ifelse(Species == "setosa" & . < 1.5, NA, .) ) expected <- iris %>% group_by(Species) %>% mutate( Petal.Length = ifelse(Species == "setosa" & Petal.Length < 1.5, NA, Petal.Length), Petal.Width = ifelse(Species == "setosa" & Petal.Width < 1.5, NA, Petal.Width) ) expect_identical(res, expected) }) test_that("_if isn't tripped up by columns named 'i' (#5330)", { test_df <- tibble(i = c("a", "b"), j = c(1, 2)) result_df <- test_df %>% mutate_if(is.character, as.factor) expect_equal(result_df$i, as.factor(test_df$i)) expect_equal(result_df$j, test_df$j) }) # Errors -------------------------------------------- test_that("colwise mutate gives meaningful error messages", { expect_snapshot({ # column not found (expect_error( mutate_at(tibble(), "test", ~ 1) )) # not summarising grouping variables tbl <- tibble(gr1 = rep(1:2, 4), gr2 = rep(1:2, each = 4), x = 1:8) tbl <- group_by(tbl, gr1) (expect_error( summarise_at(tbl, vars(gr1), mean) )) # improper additional arguments (expect_error( mutate_all(mtcars, length, 0, 0) )) (expect_error( mutate_all(mtcars, mean, na.rm = TRUE, na.rm = TRUE) )) }) }) dplyr/tests/testthat/test-join-cols.R0000644000176200001440000001656714416000530017413 0ustar liggesuserstest_that("key vars are found", { vars <- join_cols(c("x", "y"), c("x", "z"), by = join_by(x)) expect_equal(vars$x$key, c(x = 1L)) expect_equal(vars$y$key, c(x = 1L)) vars <- join_cols(c("a", "x", "b"), c("x", "a"), by = join_by(x)) expect_equal(vars$x$key, c(x = 2L)) expect_equal(vars$y$key, c(x = 1L)) vars <- join_cols(c("x", "y"), c("a", "x", "z"), by = join_by(y == z)) expect_equal(vars$x$key, c(y = 2L)) expect_equal(vars$y$key, c(z = 3L)) vars <- join_cols(c("x", "y"), c("a", "x", "z"), by = join_by(y >= z)) expect_equal(vars$x$key, c(y = 2L)) expect_equal(vars$y$key, c(z = 3L)) }) test_that("y key matches order and names of x key", { vars <- join_cols(c("x", "y", "z"), c("c", "b", "a"), by = join_by(x == a, y == b)) expect_equal(vars$x$key, c(x = 1L, y = 2L)) expect_equal(vars$y$key, c(a = 3L, b = 2L)) }) test_that("duplicate column names are given suffixes", { vars <- join_cols(c("x", "y"), c("x", "y"), by = join_by(x)) expect_equal(vars$x$out, c("x" = 1, "y.x" = 2)) expect_equal(vars$y$out, c("y.y" = 2)) # including join vars when keep = TRUE vars <- join_cols(c("x", "y"), c("x", "y"), by = join_by(x), keep = TRUE) expect_equal(vars$x$out, c("x.x" = 1, "y.x" = 2)) expect_equal(vars$y$out, c("x.y" = 1, "y.y" = 2)) vars <- join_cols(c("x", "y"), c("x", "y"), by = join_by(x < x), keep = TRUE) expect_equal(vars$x$out, c("x.x" = 1, "y.x" = 2)) expect_equal(vars$y$out, c("x.y" = 1, "y.y" = 2)) # suffixes don't create duplicates vars <- join_cols(c("x", "y", "y.x"), c("x", "y"), by = join_by(x)) expect_equal(vars$x$out, c("x" = 1, "y.x" = 2, "y.x.x" = 3)) expect_equal(vars$y$out, c("y.y" = 2)) # but not when they're the join vars vars <- join_cols(c("A", "A.x"), c("B", "A.x", "A"), by = join_by(A.x)) expect_named(vars$x$out, c("A.x.x", "A.x")) expect_named(vars$y$out, c("B", "A.y")) # or when no suffix is requested vars <- join_cols(c("x", "y"), c("x", "y"), by = join_by(x), suffix = c("", ".y")) expect_equal(vars$x$out, c("x" = 1, "y" = 2)) expect_equal(vars$y$out, c("y.y" = 2)) }) test_that("duplicate non-equi key columns are given suffixes", { vars <- join_cols(c("a", "y", "z"), c("b", "y", "z"), by = join_by(y >= y, z <= z)) expect_equal(vars$x$out, c("a" = 1, "y.x" = 2, "z.x" = 3)) expect_equal(vars$y$out, c("b" = 1, "y.y" = 2, "z.y" = 3)) }) test_that("NA names are preserved", { vars <- join_cols(c("x", NA), c("x", "z"), by = join_by(x)) expect_named(vars$x$out, c("x", NA)) vars <- join_cols(c("x", NA), c("x", NA), by = join_by(x)) expect_named(vars$x$out, c("x", "NA.x")) expect_named(vars$y$out, "NA.y") }) test_that("by default, `by` columns omitted from `y` with equi-conditions, but not non-equi conditions" , { # equi keys always keep the LHS name, regardless of whether of not a duplicate exists in the RHS # non-equi keys will get a suffix if a duplicate exists vars <- join_cols(c("x", "y", "z"), c("x", "y", "z"), by = join_by(x == y, y > z)) expect_equal(vars$x$out, c("x" = 1, "y" = 2, "z.x" = 3)) expect_equal(vars$y$out, c("x.y" = 1, "z.y" = 3)) # unless specifically requested with `keep = TRUE` vars <- join_cols(c("x", "y", "z"), c("x", "y", "z"), by = join_by(x == y, y > z), keep = TRUE) expect_equal(vars$x$out, c("x.x" = 1, "y.x" = 2, "z.x" = 3)) expect_equal(vars$y$out, c("x.y" = 1, "y.y" = 2, "z.y" = 3)) }) test_that("can't mix non-equi conditions with `keep = FALSE` (#6499)", { expect_snapshot(error = TRUE, { join_cols(c("x", "y"), c("x", "z"), by = join_by(x, y > z), keep = FALSE) }) expect_snapshot(error = TRUE, { join_cols(c("xl", "xu"), c("yl", "yu"), by = join_by(xl >= yl, xu < yu), keep = FALSE) }) # Doesn't make sense here. # With right/full joins we'd have to merge both `yl` and `yu` into `x` somehow. expect_snapshot(error = TRUE, { join_cols("x", c("yl", "yu"), by = join_by(between(x, yl, yu)), keep = FALSE) }) # Doesn't make sense here. # With right/full joins, based on how the binary conditions are generated # we'd merge: # - `yu` into `xl` # - `yl` into `xu` # Which can result in `xl` and `xu` columns that don't maintain a `xl <= xu` # invariant. expect_snapshot(error = TRUE, { join_cols(c("xl", "xu"), c("yl", "yu"), by = join_by(overlaps(xl, xu, yl, yu)), keep = FALSE) }) }) test_that("can duplicate key between non-equi conditions", { vars <- join_cols("x", c("xl", "xu"), by = join_by(x > xl, x < xu)) expect_identical(vars$x$key, c(x = 1L, x = 1L)) expect_identical(vars$x$out, c(x = 1L)) expect_identical(vars$y$key, c(xl = 1L, xu = 2L)) expect_identical(vars$y$out, c(xl = 1L, xu = 2L)) expect_identical( join_cols("x", c("xl", "xu"), by = join_by(x > xl, x < xu), keep = NULL), join_cols("x", c("xl", "xu"), by = join_by(x > xl, x < xu), keep = TRUE) ) }) test_that("can't duplicate key between equi condition and non-equi condition", { expect_snapshot(error = TRUE, join_cols("x", c("xl", "xu"), by = join_by(x > xl, x == xu))) expect_snapshot(error = TRUE, join_cols(c("xl", "xu"), "x", by = join_by(xl < x, xu == x))) }) test_that("emits useful messages", { # names expect_snapshot(error = TRUE, join_cols(c("x", "y"), c("y", "y"), join_by(y))) expect_snapshot(error = TRUE, join_cols(c("y", "y"), c("x", "y"), join_by(y))) xy <- c("x", "y") xyz <- c("x", "y", "z") # join vars errors expect_snapshot(error = TRUE, join_cols(xy, xy, by = as_join_by(list("1", y = "2")))) expect_snapshot(error = TRUE, join_cols(xy, xy, by = as_join_by(list(x = "1", "2")))) expect_snapshot(error = TRUE, join_cols(xy, xy, by = as_join_by(c("x", NA)))) expect_snapshot(error = TRUE, join_cols(xy, xy, by = as_join_by(c("aaa", "bbb")))) # join vars uniqueness expect_snapshot(error = TRUE, join_cols(xy, xy, by = as_join_by(c("x", "x", "x")))) expect_snapshot(error = TRUE, join_cols(xyz, xyz, by = join_by(x, x > y, z))) # suffixes expect_snapshot(error = TRUE, join_cols(xy, xy, by = join_by(x), suffix = "x")) expect_snapshot(error = TRUE, join_cols(xy, xy, by = join_by(x), suffix = c("", NA))) }) # ------------------------------------------------------------------------------ # join_cast_common() test_that("takes common type", { x <- tibble(a = 1, b = 2L) y <- tibble(a = 1L, b = 3) vars <- join_cols(names(x), names(y), by = join_by(a, b)) out <- join_cast_common(x, y, vars) expect_identical(out$x, tibble(a = 1, b = 2)) expect_identical(out$y, tibble(a = 1, b = 3)) }) test_that("finalizes unspecified columns (#6804)", { vars <- join_cols(x_names = "x", y_names = "x", by = join_by(x)) x <- tibble(x = NA) y <- tibble(x = NA) out <- join_cast_common(x, y, vars) expect_identical(out$x, tibble(x = NA)) expect_identical(out$y, tibble(x = NA)) x <- tibble(x = NA) y <- tibble(x = unspecified()) out <- join_cast_common(x, y, vars) expect_identical(out$x, tibble(x = NA)) expect_identical(out$y, tibble(x = logical())) x <- tibble(x = unspecified()) y <- tibble(x = unspecified()) out <- join_cast_common(x, y, vars) expect_identical(out$x, tibble(x = logical())) expect_identical(out$y, tibble(x = logical())) }) test_that("references original column in `y` when there are type errors (#6465)", { x <- tibble(a = 1) y <- tibble(b = "1") x_key <- x y_key <- set_names(y, names(x)) vars <- join_cols(names(x), names(y), by = join_by(a == b)) expect_snapshot({ (expect_error(join_cast_common(x_key, y_key, vars))) }) }) dplyr/tests/testthat/test-vec-case-when.R0000644000176200001440000002372314406402754020150 0ustar liggesuserstest_that("works with data frames", { conditions <- list( c(FALSE, TRUE, FALSE, FALSE), c(TRUE, TRUE, FALSE, FALSE), c(FALSE, TRUE, FALSE, TRUE) ) values <- list( vctrs::data_frame(x = 1, y = 2), vctrs::data_frame(x = 3, y = 4), vctrs::data_frame(x = 3:6, y = 4:7) ) out <- vec_case_when(conditions, values) expect_identical( out, vctrs::data_frame( x = c(3, 1, NA, 6), y = c(4, 2, NA, 7) ) ) }) test_that("first `TRUE` case wins", { conditions <- list( c(TRUE, FALSE), c(TRUE, TRUE), c(TRUE, TRUE) ) values <- list( 1, 2, 3 ) expect_identical( vec_case_when(conditions, values), c(1, 2) ) }) test_that("can replace missing values by catching with `is.na()`", { x <- c(1:3, NA) conditions <- list( x <= 1, x <= 2, is.na(x) ) values <- list( 1, 2, 0 ) expect_identical( vec_case_when(conditions, values), c(1, 2, NA, 0) ) }) test_that("Unused logical `NA` can still be cast to `values` ptype", { # Requires that casting happen before recycling, because it recycles # to size zero, resulting in a logical rather than an unspecified. expect_identical(vec_case_when(list(TRUE, FALSE), list("x", NA)), "x") expect_identical(vec_case_when(list(FALSE, TRUE), list("x", NA)), NA_character_) }) test_that("`conditions` inputs can be size zero", { expect_identical( vec_case_when( list(logical(), logical()), list(1, 2) ), numeric() ) expect_snapshot(error = TRUE, { vec_case_when(list(logical()), list(1:2)) }) }) test_that("retains names of `values` inputs", { value1 <- c(x = 1, y = 2) value2 <- c(z = 3, w = 4) out <- vec_case_when( list(c(TRUE, FALSE), c(TRUE, TRUE)), list(value1, value2) ) expect_named(out, c("x", "w")) }) test_that("`values` are cast to their common type", { expect_identical(vec_case_when(list(FALSE, TRUE), list(1, 2L)), 2) expect_identical(vec_case_when(list(FALSE, TRUE), list(1, NA)), NA_real_) expect_snapshot(error = TRUE, { vec_case_when(list(FALSE, TRUE), list(1, "x")) }) }) test_that("`values` must be size 1 or same size as the `conditions`", { expect_identical( vec_case_when( list(c(TRUE, TRUE)), list(1) ), c(1, 1) ) expect_identical( vec_case_when( list(c(TRUE, FALSE), c(TRUE, TRUE)), list(c(1, 2), c(3, 4)) ), c(1, 4) ) expect_snapshot(error = TRUE, { vec_case_when( list(c(TRUE, FALSE, TRUE, TRUE)), list(1:3) ) }) }) test_that("Unhandled `NA` are given a value of `default`", { expect_identical( vec_case_when(list(NA), list(1)), NA_real_ ) expect_identical( vec_case_when(list(NA), list(1), default = 2), 2 ) expect_identical( vec_case_when( list( c(FALSE, NA, TRUE, FALSE), c(NA, FALSE, TRUE, FALSE) ), list( 2, 3 ), default = 4 ), c(4, 4, 2, 4) ) }) test_that("`NA` is overridden by any `TRUE` values", { x <- c(1, 2, NA, 3) expect <- c("one", "not_one", "missing", "not_one") # `TRUE` overriding before the `NA` conditions <- list( is.na(x), x == 1 ) values <- list( "missing", "one" ) expect_identical( vec_case_when( conditions, values, default = "not_one" ), expect ) # `TRUE` overriding after the `NA` conditions <- list( x == 1, is.na(x) ) values <- list( "one", "missing" ) expect_identical( vec_case_when( conditions, values, default = "not_one" ), expect ) }) test_that("works when there is a used `default` and no missing values", { expect_identical(vec_case_when(list(c(TRUE, FALSE)), list(1), default = 3:4), c(1, 4)) }) test_that("works when there are missing values but no `default`", { expect_identical(vec_case_when(list(c(TRUE, NA)), list(1)), c(1, NA)) }) test_that("A `NULL` `default` fills in with missing values", { expect_identical( vec_case_when(list(c(TRUE, FALSE, FALSE)), list(1)), c(1, NA, NA) ) }) test_that("`default` fills in all unused slots", { expect_identical( vec_case_when(list(c(TRUE, FALSE, FALSE)), list(1), default = 2), c(1, 2, 2) ) }) test_that("`default` is initialized correctly in the logical / unspecified case", { # i.e. `vec_ptype(NA)` is unspecified but the result should be finalized to logical expect_identical(vec_case_when(list(FALSE), list(NA)), NA) }) test_that("`default` can be vectorized, and is sliced to fit as needed", { out <- vec_case_when( list( c(FALSE, TRUE, FALSE, TRUE, FALSE), c(FALSE, TRUE, FALSE, FALSE, TRUE) ), list( 1:5, 6:10 ), default = 11:15 ) expect_identical(out, c(11L, 2L, 13L, 4L, 10L)) }) test_that("`default` must be size 1 or same size as `conditions` (exact same as any other `values` input)", { expect_snapshot(error = TRUE, { vec_case_when(list(FALSE), list(1L), default = 2:3) }) }) test_that("`default` participates in common type determination (exact same as any other `values` input)", { expect_identical(vec_case_when(list(FALSE), list(1L), default = 2), 2) }) test_that("`default` that is an unused logical `NA` can still be cast to `values` ptype", { # Requires that casting happen before recycling, because it recycles # to size zero, resulting in a logical rather than an unspecified. expect_identical(vec_case_when(list(TRUE), list("x"), default = NA), "x") }) test_that("`default_arg` can be customized", { expect_snapshot(error = TRUE, { vec_case_when(list(FALSE), list(1L), default = 2:3, default_arg = "foo") }) expect_snapshot(error = TRUE, { vec_case_when(list(FALSE), list(1L), default = "x", default_arg = "foo") }) }) test_that("`conditions_arg` is validated", { expect_snapshot(error = TRUE, { vec_case_when(list(TRUE), list(1), conditions_arg = 1) }) }) test_that("`values_arg` is validated", { expect_snapshot(error = TRUE, { vec_case_when(list(TRUE), list(1), values_arg = 1) }) }) test_that("`default_arg` is validated", { expect_snapshot(error = TRUE, { vec_case_when(list(TRUE), list(1), default_arg = 1) }) }) test_that("`conditions` must all be the same size", { expect_snapshot(error = TRUE, { vec_case_when( list(c(TRUE, FALSE), TRUE), list(1, 2) ) }) expect_snapshot(error = TRUE, { vec_case_when( list(c(TRUE, FALSE), c(TRUE, FALSE, TRUE)), list(1, 2) ) }) }) test_that("`conditions` must be logical (and aren't cast to logical!)", { expect_snapshot(error = TRUE, { vec_case_when(list(1), list(2)) }) # Make sure input numbering is right in the error message! expect_snapshot(error = TRUE, { vec_case_when(list(TRUE, 3.5), list(2, 4)) }) }) test_that("`conditions` are allowed to have attributes", { x <- structure(c(FALSE, TRUE), label = "foo") expect_identical(vec_case_when(list(x), list(1), default = 2), c(2, 1)) }) test_that("`conditions` can be classed logicals", { x <- structure(c(FALSE, TRUE), class = "my_logical") expect_identical(vec_case_when(list(x), list(1), default = 2), c(2, 1)) }) test_that("`size` overrides the `conditions` sizes", { expect_snapshot(error = TRUE, { vec_case_when(list(TRUE), list(1), size = 5) }) expect_snapshot(error = TRUE, { vec_case_when( list(c(TRUE, FALSE), c(TRUE, FALSE, TRUE)), list(1, 2), size = 2 ) }) }) test_that("`ptype` overrides the `values` types", { expect_identical( vec_case_when(list(FALSE, TRUE), list(1, 2), ptype = integer()), 2L ) expect_snapshot(error = TRUE, { vec_case_when(list(FALSE, TRUE), list(1, 2), ptype = character()) }) }) test_that("number of `conditions` and `values` must be the same", { expect_snapshot(error = TRUE, { vec_case_when(list(TRUE), list()) }) expect_snapshot(error = TRUE, { vec_case_when(list(TRUE, TRUE), list(1)) }) }) test_that("can't have empty inputs", { expect_snapshot(error = TRUE, { vec_case_when(list(), list()) }) expect_snapshot(error = TRUE, { vec_case_when(list(), list(), default = 1) }) }) test_that("dots must be empty", { expect_snapshot(error = TRUE, { vec_case_when(list(TRUE), list(1), 2) }) }) test_that("`conditions` must be a list", { expect_snapshot(error = TRUE, { vec_case_when(1, list(2)) }) }) test_that("`values` must be a list", { expect_snapshot(error = TRUE, { vec_case_when(list(TRUE), 1) }) }) test_that("named inputs show up in the error message", { expect_snapshot(error = TRUE, { vec_case_when(list(x = 1.5), list(1)) }) expect_snapshot(error = TRUE, { vec_case_when(list(x = 1.5), list(1), conditions_arg = "foo") }) expect_snapshot(error = TRUE, { vec_case_when(list(x = 1.5), list(1), conditions_arg = "") }) expect_snapshot(error = TRUE, { vec_case_when(list(x = TRUE, y = c(TRUE, FALSE)), list(1, 2)) }) expect_snapshot(error = TRUE, { vec_case_when(list(x = TRUE, y = c(TRUE, FALSE)), list(1, 2), conditions_arg = "foo") }) expect_snapshot(error = TRUE, { vec_case_when(list(x = TRUE, y = c(TRUE, FALSE)), list(1, 2), conditions_arg = "") }) expect_snapshot(error = TRUE, { vec_case_when(list(TRUE, FALSE), list(1, x = "y")) }) expect_snapshot(error = TRUE, { vec_case_when(list(TRUE, FALSE), list(1, x = "y"), values_arg = "foo") }) expect_snapshot(error = TRUE, { vec_case_when(list(TRUE, FALSE), list(1, x = "y"), values_arg = "") }) expect_snapshot(error = TRUE, { vec_case_when(list(TRUE), list(NULL)) }) expect_snapshot(error = TRUE, { vec_case_when(list(TRUE), list(x = NULL)) }) expect_snapshot(error = TRUE, { vec_case_when(list(TRUE), list(NULL), values_arg = "foo") }) expect_snapshot(error = TRUE, { vec_case_when(list(TRUE), list(x = NULL), values_arg = "foo") }) expect_snapshot(error = TRUE, { vec_case_when(list(TRUE), list(NULL), values_arg = "") }) expect_snapshot(error = TRUE, { vec_case_when(list(TRUE), list(x = NULL), values_arg = "") }) }) dplyr/tests/testthat/test-slice.R0000644000176200001440000004432314525503021016610 0ustar liggesuserstest_that("empty slice drops all rows (#6573)", { df <- tibble(g = c(1, 1, 2), x = 1:3) gdf <- group_by(df, g) rdf <- rowwise(df) expect_identical(slice(df), df[integer(),]) expect_identical(slice(gdf), gdf[integer(),]) expect_identical(slice(rdf), rdf[integer(),]) }) test_that("slicing data.frame yields data.frame", { df <- data.frame(x = 1:3) expect_equal(slice(df, 1), data.frame(x = 1L)) }) test_that("slice keeps positive indices, ignoring out of range (#226)", { gf <- group_by(tibble(g = c(1, 2, 2, 3, 3, 3), id = 1:6), g) out <- slice(gf, 1) expect_equal(out$id, c(1, 2, 4)) out <- slice(gf, 2) expect_equal(out$id, c(3, 5)) }) test_that("slice drops negative indices, ignoring out of range (#3073)", { gf <- group_by(tibble(g = c(1, 2, 2, 3, 3, 3), id = 1:6), g) out <- slice(gf, -1) expect_equal(out$id, c(3, 5, 6)) out <- slice(gf, -(1:2)) expect_equal(out$id, 6) }) test_that("slice errors if positive and negative indices mixed", { expect_snapshot(error = TRUE, { slice(tibble(), 1, -1) }) }) test_that("slice ignores 0 and NA (#3313, #1235)", { gf <- group_by(tibble(g = c(1, 2, 2, 3, 3, 3), id = 1:6), g) out <- slice(gf, 0) expect_equal(out$id, integer()) out <- slice(gf, 0, 1) expect_equal(out$id, c(1, 2, 4)) out <- slice(gf, NA) expect_equal(out$id, integer()) out <- slice(gf, NA, -1) expect_equal(out$id, c(3, 5, 6)) }) test_that("slicing with one-column matrix is deprecated", { df <- tibble(g = c(1, 2, 2, 3, 3, 3), id = 1:6) expect_snapshot({ out <- slice(df, matrix(c(1, 3))) }) expect_equal(out$id, c(1, 3)) }) test_that("slice errors if index is not numeric", { expect_snapshot(error = TRUE, { slice(tibble(), "a") }) }) test_that("slice preserves groups iff requested", { gf <- group_by(tibble(g = c(1, 2, 2, 3, 3, 3), id = 1:6), g) out <- slice(gf, 2, 3) expect_equal(group_keys(out), tibble(g = c(2, 3))) expect_equal(group_rows(out), list_of(1, c(2, 3))) out <- slice(gf, 2, 3, .preserve = TRUE) expect_equal(group_keys(out), tibble(g = c(1, 2, 3))) expect_equal(group_rows(out), list_of(integer(), 1, c(2, 3))) }) test_that("slice handles zero-row and zero-column inputs (#1219, #2490)", { df <- tibble(x = numeric()) expect_equal(slice(df, 1), df) df <- tibble(.rows = 10) expect_equal(slice(df, 1), tibble(.rows = 1)) }) test_that("user errors are correctly labelled", { df <- tibble(x = 1:3) expect_snapshot(error = TRUE, { slice(df, 1 + "") slice(group_by(df, x), 1 + "") }) }) test_that("`...` can't be named (#6554)", { df <- tibble(g = 1, x = 1) expect_snapshot(error = TRUE, { slice(df, 1, foo = g) }) }) test_that("slice keeps zero length groups", { df <- tibble( e = 1, f = factor(c(1, 1, 2, 2), levels = 1:3), g = c(1, 1, 2, 2), x = c(1, 2, 1, 4) ) df <- group_by(df, e, f, g, .drop = FALSE) expect_equal(group_size(slice(df, 1)), c(1, 1, 0) ) }) test_that("slicing retains labels for zero length groups", { df <- tibble( e = 1, f = factor(c(1, 1, 2, 2), levels = 1:3), g = c(1, 1, 2, 2), x = c(1, 2, 1, 4) ) df <- group_by(df, e, f, g, .drop = FALSE) expect_equal( ungroup(count(slice(df, 1))), tibble( e = 1, f = factor(1:3), g = c(1, 2, NA), n = c(1L, 1L, 0L) ) ) }) test_that("can group transiently using `.by`", { df <- tibble(g = c(1, 1, 2), x = c(1, 2, 3)) out <- slice(df, n(), .by = g) expect_identical(out$g, c(1, 2)) expect_identical(out$x, c(2, 3)) expect_s3_class(out, class(df), exact = TRUE) }) test_that("transient grouping retains bare data.frame class", { df <- tibble(g = c(1, 1, 2), x = c(1, 2, 3)) out <- slice(df, n(), .by = g) expect_s3_class(out, class(df), exact = TRUE) }) test_that("transient grouping retains data frame attributes", { # With data.frames or tibbles df <- data.frame(g = c(1, 1, 2), x = c(1, 2, 3)) tbl <- as_tibble(df) attr(df, "foo") <- "bar" attr(tbl, "foo") <- "bar" out <- slice(df, n(), .by = g) expect_identical(attr(out, "foo"), "bar") out <- slice(tbl, n(), .by = g) expect_identical(attr(out, "foo"), "bar") }) test_that("transient grouping orders by first appearance", { df <- tibble(g = c(2, 1, 2, 0), x = c(4, 2, 8, 5)) out <- slice(df, which(x == max(x)), .by = g) expect_identical(out$g, c(2, 1, 0)) expect_identical(out$x, c(8, 2, 5)) }) test_that("can't use `.by` with `.preserve`", { df <- tibble(x = 1) expect_snapshot(error = TRUE, { slice(df, .by = x, .preserve = TRUE) }) }) test_that("catches `.by` with grouped-df", { df <- tibble(x = 1) gdf <- group_by(df, x) expect_snapshot(error = TRUE, { slice(gdf, .by = x) }) }) test_that("catches `.by` with rowwise-df", { df <- tibble(x = 1) rdf <- rowwise(df) expect_snapshot(error = TRUE, { slice(rdf, .by = x) }) }) test_that("catches `by` typo (#6647)", { df <- tibble(x = 1) expect_snapshot(error = TRUE, { slice(df, by = x) }) }) # Slice variants ---------------------------------------------------------- test_that("slice_helpers() call get_slice_size()", { df <- tibble(x = 1) expect_snapshot(error = TRUE, { slice_head(df, n = "a") slice_tail(df, n = "a") slice_min(df, x, n = "a") slice_max(df, x, n = "a") slice_sample(df, n= "a") }) }) test_that("get_slice_size() validates its inputs", { expect_snapshot(error = TRUE, { get_slice_size(n = 1, prop = 1) get_slice_size(n = "a") get_slice_size(prop = "a") }) }) test_that("get_slice_size() snapshots", { expect_snapshot({ body(get_slice_size(prop = 0)) body(get_slice_size(prop = 0.4)) body(get_slice_size(prop = 2)) body(get_slice_size(prop = 2, allow_outsize = TRUE)) body(get_slice_size(prop = -0.4)) body(get_slice_size(prop = -2)) body(get_slice_size(n = 0)) body(get_slice_size(n = 4)) body(get_slice_size(n = 20)) body(get_slice_size(n = 20, allow_outsize = TRUE)) body(get_slice_size(n = -4)) body(get_slice_size(n = -20)) }) }) test_that("get_slice_size() standardises prop", { expect_equal(get_slice_size(prop = 0)(10), 0) expect_equal(get_slice_size(prop = 0.4)(10), 4) expect_equal(get_slice_size(prop = 2)(10), 10) expect_equal(get_slice_size(prop = 2, allow_outsize = TRUE)(10), 20) expect_equal(get_slice_size(prop = -0.4)(10), 6) expect_equal(get_slice_size(prop = -2)(10), 0) }) test_that("get_slice_size() standardises n", { expect_equal(get_slice_size(n = 0)(10), 0) expect_equal(get_slice_size(n = 4)(10), 4) expect_equal(get_slice_size(n = 20)(10), 10) expect_equal(get_slice_size(n = 20, allow_outsize = TRUE)(10), 20) expect_equal(get_slice_size(n = -4)(10), 6) expect_equal(get_slice_size(n = -20)(10), 0) }) test_that("get_slice_size() rounds prop in the right direction", { expect_equal(get_slice_size(prop = 0.16)(10), 1) expect_equal(get_slice_size(prop = -0.16)(10), 9) }) test_that("n must be an integer", { df <- tibble(x = 1:5) expect_snapshot(slice_head(df, n = 1.1), error = TRUE) }) test_that("functions silently truncate results", { # only test positive n because get_slice_size() converts all others df <- tibble(x = 1:5) expect_equal(nrow(slice_head(df, n = 6)), 5) expect_equal(nrow(slice_tail(df, n = 6)), 5) expect_equal(nrow(slice_min(df, x, n = 6)), 5) expect_equal(nrow(slice_max(df, x, n = 6)), 5) expect_equal(nrow(slice_sample(df, n = 6)), 5) }) test_that("slice helpers with n = 0 return no rows", { df <- tibble(x = 1:5) expect_equal(nrow(slice_head(df, n = 0)), 0) expect_equal(nrow(slice_tail(df, n = 0)), 0) expect_equal(nrow(slice_min(df, x, n = 0)), 0) expect_equal(nrow(slice_max(df, x, n = 0)), 0) expect_equal(nrow(slice_sample(df, n = 0)), 0) }) test_that("slice_*() doesn't look for `n` in data (#6089)", { df <- data.frame(x = 1:10, n = 10:1, g = rep(1:2, each = 5)) expect_error(slice_max(df, order_by = n), NA) expect_error(slice_min(df, order_by = n), NA) expect_error(slice_sample(df, weight_by = n, n = 1L), NA) df <- group_by(df, g) expect_error(slice_max(df, order_by = n), NA) expect_error(slice_min(df, order_by = n), NA) expect_error(slice_sample(df, weight_by = n, n = 1L), NA) }) test_that("slice_*() checks that `n=` is explicitly named and ... is empty", { # i.e. that every function calls check_slice_dots() df <- data.frame(x = 1:10) expect_snapshot(error = TRUE, { slice_head(df, 5) slice_tail(df, 5) slice_min(df, x, 5) slice_max(df, x, 5) slice_sample(df, 5) }) # And works with namespace prefix (#6946) expect_snapshot(error = TRUE, { dplyr::slice_head(df, 5) dplyr::slice_tail(df, 5) dplyr::slice_min(df, x, 5) dplyr::slice_max(df, x, 5) dplyr::slice_sample(df, 5) }) expect_snapshot(error = TRUE, { slice_head(df, 5, 2) slice_tail(df, 5, 2) slice_min(df, x, 5, 2) slice_max(df, x, 5, 2) slice_sample(df, 5, 2) }) }) test_that("slice_helpers do call slice() and benefit from dispatch (#6084)", { local_methods( slice.noisy = function(.data, ..., .preserve = FALSE) { warning("noisy") NextMethod() } ) nf <- tibble(x = 1:10, g = rep(1:2, each = 5)) %>% group_by(g) class(nf) <- c("noisy", class(nf)) expect_warning(slice(nf, 1:2), "noisy") expect_warning(slice_sample(nf, n = 2), "noisy") expect_warning(slice_head(nf, n = 2), "noisy") expect_warning(slice_tail(nf, n = 2), "noisy") expect_warning(slice_min(nf, x, n = 2), "noisy") expect_warning(slice_max(nf, x, n = 2), "noisy") expect_warning(sample_n(nf, 2), "noisy") expect_warning(sample_frac(nf, .5), "noisy") }) test_that("slice_helper `by` errors use correct error context and correct `by_arg`", { df <- tibble(x = 1) gdf <- group_by(df, x) expect_snapshot(error = TRUE, { slice_head(gdf, n = 1, by = x) slice_tail(gdf, n = 1, by = x) slice_min(gdf, order_by = x, by = x) slice_max(gdf, order_by = x, by = x) slice_sample(gdf, n = 1, by = x) }) }) test_that("slice_helper catches `.by` typo (#6647)", { df <- tibble(x = 1) expect_snapshot(error = TRUE, { slice_head(df, n = 1, .by = x) slice_tail(df, n = 1, .by = x) slice_min(df, order_by = x, .by = x) slice_max(df, order_by = x, .by = x) slice_sample(df, n = 1, .by = x) }) }) # slice_min/slice_max ----------------------------------------------------- test_that("min and max return ties by default", { df <- tibble(id = 1:5, x = c(1, 1, 1, 2, 2)) expect_equal(slice_min(df, x)$id, c(1, 2, 3)) expect_equal(slice_max(df, x)$id, c(4, 5)) expect_equal(slice_min(df, x, with_ties = FALSE)$id, 1) expect_equal(slice_max(df, x, with_ties = FALSE)$id, 4) }) test_that("min and max reorder results", { df <- data.frame(id = 1:4, x = c(2, 3, 1, 2)) expect_equal(slice_min(df, x, n = 2)$id, c(3, 1, 4)) expect_equal(slice_max(df, x, n = 2)$id, c(2, 1, 4)) expect_equal(slice_min(df, x, n = 2, with_ties = FALSE)$id, c(3, 1)) expect_equal(slice_max(df, x, n = 2, with_ties = FALSE)$id, c(2, 1)) }) test_that("min and max include NAs when appropriate", { df <- tibble(id = 1:3, x = c(1, NA, NA)) expect_equal(slice_min(df, x, n = 1)$id, 1) expect_equal(slice_max(df, x, n = 1)$id, 1) expect_equal(slice_min(df, x, n = 2)$id, c(1, 2, 3)) expect_equal(slice_min(df, x, n = 2, with_ties = FALSE)$id, c(1, 2)) df <- tibble(id = 1:4, x = NA) expect_equal(slice_min(df, x, n = 2, na_rm = TRUE)$id, integer()) expect_equal(slice_max(df, x, n = 2, na_rm = TRUE)$id, integer()) }) test_that("min and max ignore NA's when requested (#4826)", { df <- tibble(id = 1:4, x = c(2, NA, 1, 2)) expect_equal(slice_min(df, x, n = 2, na_rm = TRUE)$id, c(3, 1, 4)) expect_equal(slice_max(df, x, n = 2, na_rm = TRUE)$id, c(1, 4)) # Check with list to confirm use full vctrs support df <- tibble(id = 1:4, x = list(NULL, 1, NULL, NULL)) expect_equal(slice_min(df, x, n = 2, na_rm = TRUE)$id, 2) expect_equal(slice_max(df, x, n = 2, na_rm = TRUE)$id, 2) # Drop when any element is missing df <- tibble(id = 1:3, a = c(1, 2, NA), b = c(2, NA, NA)) expect_equal(slice_min(df, tibble(a, b), n = 3, na_rm = TRUE)$id, 1) expect_equal(slice_max(df, tibble(a, b), n = 3, na_rm = TRUE)$id, 1) }) test_that("slice_min/max() count from back with negative n/prop", { df <- tibble(id = 1:4, x = c(2, 3, 1, 4)) expect_equal(slice_min(df, x, n = -1), slice_min(df, x, n = 3)) expect_equal(slice_max(df, x, n = -1), slice_max(df, x, n = 3)) # and can be larger than group size expect_equal(slice_min(df, x, n = -10), df[0, ]) expect_equal(slice_max(df, x, n = -10), df[0, ]) }) test_that("slice_min/max() can order by multiple variables (#6176)", { df <- tibble(id = 1:4, x = 1, y = c(1, 4, 2, 3)) expect_equal(slice_min(df, tibble(x, y), n = 1)$id, 1) expect_equal(slice_max(df, tibble(x, y), n = 1)$id, 2) }) test_that("slice_min/max() work with `by`", { df <- tibble(g = c(2, 2, 1, 1), x = c(1, 2, 3, 1)) expect_identical(slice_min(df, x, by = g), df[c(1, 4),]) expect_identical(slice_max(df, x, by = g), df[c(2, 3),]) }) test_that("slice_min/max() inject `with_ties` and `na_rm` (#6725)", { # So columns named `with_ties` and `na_rm` don't mask those arguments df <- tibble(x = c(1, 1, 2, 2), with_ties = 1:4) expect_identical(slice_min(df, x, n = 1), vec_slice(df, 1:2)) expect_identical(slice_min(df, x, n = 1, with_ties = FALSE), vec_slice(df, 1)) expect_identical(slice_max(df, x, n = 1), vec_slice(df, 3:4)) expect_identical(slice_max(df, x, n = 1, with_ties = FALSE), vec_slice(df, 3)) df <- tibble(x = c(1, NA), na_rm = 1:2) expect_identical(slice_min(df, x, n = 2), df) expect_identical(slice_min(df, x, n = 2, na_rm = TRUE), vec_slice(df, 1)) expect_identical(slice_max(df, x, n = 2), df) expect_identical(slice_max(df, x, n = 2, na_rm = TRUE), vec_slice(df, 1)) }) test_that("slice_min/max() check size of `order_by=` (#5922)", { expect_snapshot(error = TRUE, { slice_min(data.frame(x = 1:10), 1:6) slice_max(data.frame(x = 1:10), 1:6) }) }) test_that("slice_min/max() validate simple arguments", { expect_snapshot(error = TRUE, { slice_min(data.frame(x = 1:10)) slice_max(data.frame(x = 1:10)) slice_min(data.frame(x = 1:10), x, with_ties = 1) slice_max(data.frame(x = 1:10), x, with_ties = 1) slice_min(data.frame(x = 1:10), x, na_rm = 1) slice_max(data.frame(x = 1:10), x, na_rm = 1) }) }) # slice_sample ------------------------------------------------------------ test_that("slice_sample() respects weight_by and replaces", { df <- tibble(x = 1:100, wt = c(1, rep(0, 99))) out <- slice_sample(df, n = 1, weight_by = wt) expect_equal(out$x, 1) out <- slice_sample(df, n = 2, weight_by = wt, replace = TRUE) expect_equal(out$x, c(1, 1)) }) test_that("slice_sample() can increase rows iff replace = TRUE", { df <- tibble(x = 1:10) expect_equal(nrow(slice_sample(df, n = 20, replace = FALSE)), 10) expect_equal(nrow(slice_sample(df, n = 20, replace = TRUE)), 20) }) test_that("slice_sample() checks size of `weight_by=` (#5922)", { df <- tibble(x = 1:10) expect_snapshot(slice_sample(df, n = 2, weight_by = 1:6), error = TRUE) }) test_that("slice_sample() works with zero-row data frame (#5729)", { df <- tibble(x = character(), w = numeric()) out <- slice_sample(df, prop = 0.5, weight_by = w) expect_equal(out, df) }) test_that("`slice_sample()` validates `replace`", { df <- tibble() expect_snapshot(error = TRUE, { slice_sample(df, replace = 1) slice_sample(df, replace = NA) }) }) test_that("slice_sample() injects `replace` (#6725)", { # So a column named `replace` doesn't mask that argument df <- tibble(replace = 1) expect_identical(slice_sample(df, n = 2), df) expect_identical(slice_sample(df, n = 2, replace = TRUE), vec_slice(df, c(1, 1))) }) test_that("slice_sample() handles positive n= and prop=", { gf <- group_by(tibble(a = 1, b = 1), a) expect_equal(slice_sample(gf, n = 3, replace = TRUE), gf[c(1, 1, 1), ]) expect_equal(slice_sample(gf, prop = 3, replace = TRUE), gf[c(1, 1, 1), ]) }) test_that("slice_sample() handles negative n= and prop= (#6402)", { df <- tibble(a = 1:2) expect_equal(nrow(slice_sample(df, n = -1)), 1) expect_equal(nrow(slice_sample(df, prop = -0.5)), 1) # even if larger than n expect_equal(nrow(slice_sample(df, n = -3)), 0) expect_equal(nrow(slice_sample(df, prop = -2)), 0) }) test_that("slice_sample() works with `by`", { df <- tibble(g = c(2, 2, 2, 1), x = c(1, 2, 3, 1)) expect_identical(slice_sample(df, n = 2, by = g)$g, c(2, 2, 1)) }) # slice_head/slice_tail --------------------------------------------------- test_that("slice_head/slice_tail keep positive values", { gf <- group_by(tibble(g = c(1, 2, 2, 3, 3, 3), id = 1:6), g) expect_equal(slice_head(gf, n = 1)$id, c(1, 2, 4)) expect_equal(slice_head(gf, n = 2)$id, c(1, 2, 3, 4, 5)) expect_equal(slice_tail(gf, n = 1)$id, c(1, 3, 6)) expect_equal(slice_tail(gf, n = 2)$id, c(1, 2, 3, 5, 6)) }) test_that("slice_head/tail() count from back with negative n/prop", { df <- tibble(id = 1:4, x = c(2, 3, 1, 4)) expect_equal(slice_head(df, n = -1), slice_head(df, n = 3)) expect_equal(slice_tail(df, n = -1), slice_tail(df, n = 3)) # and can be larger than group size expect_equal(slice_head(df, n = -10), df[0, ]) expect_equal(slice_tail(df, n = -10), df[0, ]) }) test_that("slice_head/slice_tail drop from opposite end when n/prop negative", { gf <- group_by(tibble(g = c(1, 2, 2, 3, 3, 3), id = 1:6), g) expect_equal(slice_head(gf, n = -1)$id, c(2, 4, 5)) expect_equal(slice_head(gf, n = -2)$id, 4) expect_equal(slice_tail(gf, n = -1)$id, c(3, 5, 6)) expect_equal(slice_tail(gf, n = -2)$id, 6) }) test_that("slice_head/slice_tail handle infinite n/prop", { df <- tibble(x = 1) expect_identical(slice_head(df, n = Inf), df) expect_identical(slice_tail(df, n = Inf), df) expect_identical(slice_head(df, n = -Inf), df[0, ]) expect_identical(slice_tail(df, n = -Inf), df[0, ]) expect_identical(slice_head(df, prop = Inf), df) expect_identical(slice_tail(df, prop = Inf), df) expect_identical(slice_head(df, prop = -Inf), df[0, ]) expect_identical(slice_tail(df, prop = -Inf), df[0, ]) }) test_that("slice_head/slice_tail work with `by`", { df <- tibble(g = c(2, 2, 2, 1), x = c(1, 2, 3, 1)) expect_identical(slice_head(df, n = 2, by = g), df[c(1, 2, 4),]) expect_identical(slice_tail(df, n = 2, by = g), df[c(2, 3, 4),]) }) dplyr/tests/testthat/helper-encoding.R0000644000176200001440000000360514366556340017614 0ustar liggesusersget_lang_strings <- function() { lang_strings <- c( de = "Gl\u00fcck", cn = "\u5e78\u798f", ru = "\u0441\u0447\u0430\u0441\u0442\u044c\u0435", ko = "\ud589\ubcf5" ) native_lang_strings <- enc2native(lang_strings) same <- (lang_strings == native_lang_strings) list( same = lang_strings[same], different = lang_strings[!same] ) } get_native_lang_string <- function() { lang_strings <- get_lang_strings() if (length(lang_strings$same) == 0) testthat::skip("No native language string available") lang_strings$same[[1L]] } get_alien_lang_string <- function() { lang_strings <- get_lang_strings() if (length(lang_strings$different) == 0) testthat::skip("No alien language string available") lang_strings$different[[1L]] } has_locale <- function(locale, category) { original <- Sys.getlocale(category = category) on.exit(Sys.setlocale(category = category, locale = original), add = TRUE) tryCatch( expr = { Sys.setlocale(category = category, locale = locale) TRUE }, warning = function(w) FALSE, error = function(e) FALSE ) } has_collate_locale <- function(locale) { has_locale(locale = locale, category = "LC_COLLATE") } has_ctype_locale <- function(enc) { has_locale(locale = enc, category = "LC_CTYPE") } non_utf8_encoding <- function(enc = NULL) { if (!l10n_info()$`UTF-8`) { return(Sys.getlocale("LC_CTYPE")) } enc <- enc %||% c( "en_US.ISO8859-1", "en_US.ISO8859-15", "fr_CH.ISO8859-1", "fr_CH.ISO8859-15" ) available <- vapply(enc, has_ctype_locale, logical(1)) if (any(available)) { enc[available][1] } else { NULL } } local_non_utf8_encoding <- function(enc = NULL, env = parent.frame()) { non_utf8 <- non_utf8_encoding(enc) if (is.null(non_utf8)) { skip("Can't set a non-UTF-8 encoding") } else { withr::local_locale(c(LC_CTYPE = non_utf8), .local_envir = env) } } dplyr/tests/testthat/test-lead-lag.R0000644000176200001440000001047314366556340017175 0ustar liggesuserstest_that("`lead()` / `lag()` get the direction right", { expect_identical(lead(1:5), c(2:5, NA)) expect_identical(lag(1:5), c(NA, 1:4)) }) test_that("If n = 0, lead and lag return x", { x <- c(10L, 8L, 1L, 3L, 6L, 9L, 4L, 2L, 5L, 7L) expect_equal(lead(x, 0), x) expect_equal(lag(x, 0), x) }) test_that("If n = length(x), returns all missing", { x <- c(10L, 8L, 1L, 3L, 6L, 9L, 4L, 2L, 5L, 7L) expect_equal(lead(x, length(x)), rep(NA_integer_, length(x))) expect_equal(lag(x, length(x)), rep(NA_integer_, length(x))) }) test_that("`lag()` gives informative error for objects", { expect_snapshot(error = TRUE, { lag(ts(1:10)) }) }) test_that("lead() and lag() work for matrices (#5028)", { m <- matrix(1:6, ncol = 2) expect_equal(lag(m, 1), matrix(c(NA_integer_, 1L, 2L, NA_integer_, 4L, 5L), ncol = 2)) expect_equal(lag(m, 1, default = NA), matrix(c(NA_integer_, 1L, 2L, NA_integer_, 4L, 5L), ncol= 2)) expect_equal(lead(m, 1), matrix(c(2L, 3L, NA_integer_, 5L, 6L, NA_integer_), ncol = 2)) expect_equal(lead(m, 1, default = NA), matrix(c(2L, 3L, NA_integer_, 5L, 6L, NA_integer_), ncol = 2)) }) test_that("lead and lag preserve factors", { x <- factor(c("a", "b", "c")) expect_equal(levels(lead(x)), c("a", "b", "c")) expect_equal(levels(lag(x)), c("a", "b", "c")) }) test_that("lead and lag preserves dates and times", { x <- as.Date("2013-01-01") + 1:3 y <- as.POSIXct(x) expect_s3_class(lead(x), "Date") expect_s3_class(lag(x), "Date") expect_s3_class(lead(y), "POSIXct") expect_s3_class(lag(y), "POSIXct") }) test_that("`lead()` / `lag()` validate `n`", { expect_snapshot(error = TRUE, { lead(1:5, n = 1:2) lead(1:5, -1) }) expect_snapshot(error = TRUE, { lag(1:5, n = 1:2) lag(1:5, -1) }) }) test_that("`lead()` / `lag()` check for empty dots", { expect_snapshot(error = TRUE, { lead(1:5, deault = 1) }) expect_snapshot(error = TRUE, { lag(1:5, deault = 1) }) }) test_that("`lead()` / `lag()` require that `x` is a vector", { expect_snapshot(error = TRUE, { lead(environment()) }) expect_snapshot(error = TRUE, { lag(environment()) }) }) # ------------------------------------------------------------------------------ # shift() test_that("works with all 4 combinations of with/without `default` and lag/lead", { x <- 1:5 expect_identical(shift(x, n = 2L), c(NA, NA, 1L, 2L, 3L)) expect_identical(shift(x, n = 2L, default = 0L), c(0L, 0L, 1L, 2L, 3L)) expect_identical(shift(x, n = -2L), c(3L, 4L, 5L, NA, NA)) expect_identical(shift(x, n = -2L, default = 0L), c(3L, 4L, 5L, 0L, 0L)) }) test_that("works with size 0 input", { x <- integer() expect_identical(shift(x, n = 2L), x) expect_identical(shift(x, n = 2L, default = 3L), x) expect_identical(shift(x, n = -2L), x) expect_identical(shift(x, n = -2L, default = 3L), x) }) test_that("works with `n = 0` with and without `default`", { x <- 1:5 expect_identical(shift(x, n = 0L), x) expect_identical(shift(x, n = 0L, default = -1L), x) x <- integer() expect_identical(shift(x, n = 0L), x) expect_identical(shift(x, n = 0L, default = -1L), x) }) test_that("works with data frames", { df <- tibble(a = 1:3, b = letters[1:3]) expect_identical(shift(df, n = 1), vec_slice(df, c(NA, 1, 2))) expect_identical(shift(df, n = -1), vec_slice(df, c(2, 3, NA))) default <- tibble(a = 0L, b = "") expect_identical( shift(df, n = 2, default = default), vec_c(default, default, vec_slice(df, 1)) ) }) test_that("is affected by `order_by`", { x <- 1:5 order_by <- c(2, 3, 2, 1, 5) expect_identical( shift(x, n = 1, order_by = order_by), c(4L, 3L, 1L, NA, 2L) ) expect_identical( shift(x, n = -2, order_by = order_by), c(2L, NA, 5L, 3L, NA) ) }) test_that("`default` is cast to the type of `x` (#6330)", { expect_identical(shift(1L, default = 2), 2L) expect_snapshot(error = TRUE, { shift(1L, default = 1.5) }) }) test_that("`default` must be size 1 (#5641)", { expect_snapshot(error = TRUE, { shift(1:5, default = 1:2) }) expect_snapshot(error = TRUE, { shift(1:5, default = integer()) }) }) test_that("`n` is validated", { expect_snapshot(error = TRUE, { shift(1, n = 1:2) }) }) test_that("`order_by` must be the same size as `x`", { expect_snapshot(error = TRUE, { shift(1:5, order_by = 1:4) }) }) dplyr/tests/testthat/test-order-by.R0000644000176200001440000000215314366556340017246 0ustar liggesuserstest_that("order_by() gives useful error messages", { expect_snapshot({ (expect_error(order_by(mtcars, 10))) (expect_error(order_by(mtcars, cyl))) }) }) test_that("`with_order()` works with data frame `order_by` (#6334)", { x <- 1:3 order_by <- tibble(a = c(1, 1, 2), b = c(2, 1, 1)) expect_identical(with_order(order_by, lag, x), c(2L, NA, 1L)) }) test_that("`with_order()` requires `order_by` and `x` to be the same size", { expect_snapshot(error = TRUE, { with_order(1:2, identity, 1:3) }) }) test_that("order_by() returns correct value", { expected <- int(15, 14, 12, 9, 5) expect_identical(order_by(5:1, cumsum(1:5)), expected) x <- 5:1 y <- 1:5 expect_identical(order_by(x, cumsum(y)), expected) }) test_that("order_by() works in arbitrary envs (#2297)", { env <- child_env("base") expect_equal( with_env(env, dplyr::order_by(5:1, cumsum(1:5))), rev(cumsum(rev(1:5))) ) expect_equal( order_by(5:1, cumsum(1:5)), rev(cumsum(rev(1:5))) ) }) test_that("order_by() give meaningful errors", { expect_snapshot({ (expect_error(order_by(NULL, 1L))) }) }) dplyr/tests/testthat/test-sample.R0000644000176200001440000000750214366556340017007 0ustar liggesusers# Basic behaviour ------------------------------------------------------------- test_that("sample preserves class", { expect_s3_class(sample_n(mtcars, 1), "data.frame") expect_s3_class(sample_n(as_tibble(mtcars), 1), "tbl_df") expect_s3_class(sample_frac(mtcars, 1), "data.frame") expect_s3_class(sample_frac(as_tibble(mtcars), 1), "tbl_df") }) # Ungrouped -------------------------------------------------------------------- test_that("sample respects weight", { df <- data.frame(x = 1:2, y = c(0, 1)) expect_equal(sample_n(df, 1, weight = y)$x, 2) expect_equal(sample_frac(df, 0.5, weight = y)$x, 2) }) test_that("slice does not evaluate the expression in empty groups (#1438)", { res <- mtcars %>% group_by(cyl) %>% filter(cyl==6) %>% slice(1:2) expect_equal(nrow(res), 2L) expect_error( res <- mtcars %>% group_by(cyl) %>% filter(cyl==6) %>% sample_n(size=3), NA ) expect_equal(nrow(res), 3L) }) # Grouped ---------------------------------------------------------------------- test_that("sampling grouped tbl samples each group", { sampled <- mtcars %>% group_by(cyl) %>% sample_n(2) expect_s3_class(sampled, "grouped_df") expect_equal(group_vars(sampled), "cyl") expect_equal(nrow(sampled), 6) expect_equal(map_int(group_rows(sampled), length), c(2,2,2)) }) test_that("grouped sample respects weight", { df2 <- tibble( x = rep(1:2, 100), y = rep(c(0, 1), 100), g = rep(1:2, each = 100) ) grp <- df2 %>% group_by(g) expect_equal(sample_n(grp, 1, weight = y)$x, c(2, 2)) expect_equal(sample_frac(grp, 0.5, weight = y)$x, rep(2, nrow(df2) / 2)) }) test_that("grouped sample accepts NULL weight from variable (for saeSim)", { df <- tibble( x = rep(1:2, 10), y = rep(c(0, 1), 10), g = rep(1:2, each = 10) ) weight <- NULL expect_no_error(sample_n(df, nrow(df), weight = weight)) expect_no_error(sample_frac(df, weight = weight)) grp <- df %>% group_by(g) expect_no_error(sample_n(grp, nrow(df) / 2, weight = weight)) expect_no_error(sample_frac(grp, weight = weight)) }) test_that("sample_n and sample_frac can call n() (#3413)", { df <- tibble( x = rep(1:2, 10), y = rep(c(0, 1), 10), g = rep(1:2, each = 10) ) gdf <- group_by(df, g) expect_equal(nrow(sample_n(df, n())), nrow(df)) expect_equal(nrow(sample_n(gdf, n())), nrow(gdf)) expect_equal(nrow(sample_n(df, n() - 2L)), nrow(df) - 2) expect_equal(nrow(sample_n(gdf, n() - 2L)), nrow(df) - 4) }) test_that("sample_n and sample_frac handles lazy grouped data frames (#3380)", { df1 <- data.frame(x = 1:10, y = rep(1:2, each=5)) df2 <- data.frame(x = 6:15, z = 1:10) res <- df1 %>% group_by(y) %>% anti_join(df2, by="x") %>% sample_n(1) expect_equal(nrow(res), 1L) res <- df1 %>% group_by(y) %>% anti_join(df2, by="x") %>% sample_frac(0.2) expect_equal(nrow(res), 1L) }) # Errors -------------------------------------------- test_that("sample_*() gives meaningful error messages", { expect_snapshot({ df2 <- tibble( x = rep(1:2, 100), y = rep(c(0, 1), 100), g = rep(1:2, each = 100) ) grp <- df2 %>% group_by(g) # base R error messages (expect_error( sample_n(grp, nrow(df2) / 2, weight = y) )) (expect_error( sample_frac(grp, 1, weight = y) )) # can't sample more values than obs (without replacement) (expect_error( mtcars %>% group_by(cyl) %>% sample_n(10) )) # unknown type (expect_error( sample_n(list()) )) (expect_error( sample_frac(list()) )) "# respects weight" df <- data.frame(x = 1:2, y = c(0, 1)) (expect_error( sample_n(df, 2, weight = y) )) (expect_error( sample_frac(df, 2) )) (expect_error( sample_frac(df %>% group_by(y), 2) )) (expect_error( sample_frac(df, 1, weight = y) )) }) }) dplyr/tests/testthat/test-deprec-do.R0000644000176200001440000001377014406402754017366 0ustar liggesusers# Grouped data frames ---------------------------------------------------------- df <- data.frame( g = c(1, 2, 2, 3, 3, 3), x = 1:6, y = 6:1 ) %>% group_by(g) test_that("unnamed results bound together by row", { first <- df %>% do(head(., 1)) expect_equal(nrow(first), 3) expect_equal(first$g, 1:3) expect_equal(first$x, c(1, 2, 4)) }) test_that("named argument become list columns", { out <- df %>% do(nrow = nrow(.), ncol = ncol(.)) expect_equal(out$nrow, list(1, 2, 3)) # includes grouping columns expect_equal(out$ncol, list(3, 3, 3)) }) test_that("multiple outputs can access data (#2998)", { out <- do(tibble(a = 1), g = nrow(.), h = nrow(.)) expect_equal(names(out), c("g", "h")) expect_equal(out$g, list(1L)) expect_equal(out$h, list(1L)) }) test_that("columns in output override columns in input", { out <- df %>% do(data.frame(g = 1)) expect_equal(names(out), "g") expect_equal(out$g, c(1, 1, 1)) }) test_that("empty results preserved (#597)", { blankdf <- function(x) data.frame(blank = numeric(0)) dat <- data.frame(a = 1:2, b = factor(1:2)) expect_equal( dat %>% group_by(b, .drop = FALSE) %>% do(blankdf(.)) %>% ungroup(), tibble(b = factor(integer(), levels = 1:2), blank = numeric()) ) }) test_that("empty inputs give empty outputs (#597)", { out <- data.frame(a = numeric(), b = factor()) %>% group_by(b, .drop = FALSE) %>% do(data.frame()) expect_equal(out, data.frame(b = factor()) %>% group_by(b, .drop = FALSE)) out <- data.frame(a = numeric(), b = character()) %>% group_by(b, .drop = FALSE) %>% do(data.frame()) expect_equal(out, data.frame(b = character()) %>% group_by(b, .drop = FALSE)) }) test_that("grouped do evaluates args in correct environment", { a <- 10 f <- function(a) { mtcars %>% group_by(cyl) %>% do(a = a) } expect_equal(f(100)$a, list(100, 100, 100)) }) # Ungrouped data frames -------------------------------------------------------- test_that("ungrouped data frame with unnamed argument returns data frame", { out <- mtcars %>% do(head(.)) expect_s3_class(out, "data.frame") expect_equal(dim(out), c(6, 11)) }) test_that("ungrouped data frame with named argument returns list data frame", { out <- mtcars %>% do(x = 1, y = 2:10) expect_s3_class(out, "tbl_df") expect_equal(out$x, list(1)) expect_equal(out$y, list(2:10)) }) test_that("ungrouped do evaluates args in correct environment", { a <- 10 f <- function(a) { mtcars %>% do(a = a) } expect_equal(f(100)$a, list(100)) }) # Rowwise data frames ---------------------------------------------------------- test_that("can do on rowwise dataframe", { out <- mtcars %>% rowwise() %>% do(x = 1) exp <- tibble(x =rep(list(1), nrow(mtcars))) %>% rowwise() expect_identical(out, exp) }) # Zero row inputs -------------------------------------------------------------- test_that("empty data frames give consistent outputs", { dat <- tibble(x = numeric(0), g = character(0)) grp <- dat %>% group_by(g) emt <- grp %>% filter(FALSE) dat %>% do(data.frame()) %>% vapply(pillar::type_sum, character(1)) %>% length() %>% expect_equal(0) dat %>% do(data.frame(y = integer(0))) %>% vapply(pillar::type_sum, character(1)) %>% expect_equal(c(y = "int")) dat %>% do(data.frame(.)) %>% vapply(pillar::type_sum, character(1)) %>% expect_equal(c(x = "dbl", g = "chr")) dat %>% do(data.frame(., y = integer(0))) %>% vapply(pillar::type_sum, character(1)) %>% expect_equal(c(x = "dbl", g = "chr", y = "int")) dat %>% do(y = ncol(.)) %>% vapply(pillar::type_sum, character(1)) %>% expect_equal(c(y = "list")) # Grouped data frame should have same col types as ungrouped, with addition # of grouping variable grp %>% do(data.frame()) %>% vapply(pillar::type_sum, character(1)) %>% expect_equal(c(g = "chr")) grp %>% do(data.frame(y = integer(0))) %>% vapply(pillar::type_sum, character(1)) %>% expect_equal(c(g = "chr", y = "int")) grp %>% do(data.frame(.)) %>% vapply(pillar::type_sum, character(1)) %>% expect_equal(c(x = "dbl", g = "chr")) grp %>% do(data.frame(., y = integer(0))) %>% vapply(pillar::type_sum, character(1)) %>% expect_equal(c(x = "dbl", g = "chr", y = "int")) grp %>% do(y = ncol(.)) %>% vapply(pillar::type_sum, character(1)) %>% expect_equal(c(g = "chr", y = "list")) # A empty grouped dataset should have same types as grp emt %>% do(data.frame()) %>% vapply(pillar::type_sum, character(1)) %>% expect_equal(c(g = "chr")) emt %>% do(data.frame(y = integer(0))) %>% vapply(pillar::type_sum, character(1)) %>% expect_equal(c(g = "chr", y = "int")) emt %>% do(data.frame(.)) %>% vapply(pillar::type_sum, character(1)) %>% expect_equal(c(x = "dbl", g = "chr")) emt %>% do(data.frame(., y = integer(0))) %>% vapply(pillar::type_sum, character(1)) %>% expect_equal(c(x = "dbl", g = "chr", y = "int")) emt %>% do(y = ncol(.)) %>% vapply(pillar::type_sum, character(1)) %>% expect_equal(c(g = "chr", y = "list")) }) test_that("handling of empty data frames in do", { blankdf <- function(x) data.frame(blank = numeric(0)) dat <- data.frame(a = 1:2, b = factor(1:2)) res <- dat %>% group_by(b, .drop = FALSE) %>% do(blankdf(.)) expect_equal(names(res), c("b", "blank")) }) test_that("do() does not retain .drop attribute (#4176)", { res <- iris %>% group_by(Species) %>% do(data.frame(n=1)) expect_null(attr(res, ".drop", exact = TRUE)) }) # Errors -------------------------------------------- test_that("do() gives meaningful error messages", { df <- data.frame( g = c(1, 2, 2, 3, 3, 3), x = 1:6, y = 6:1 ) %>% group_by(g) expect_snapshot({ (expect_error(df %>% do(head, tail))) # unnamed elements must return data frames (expect_error(df %>% ungroup() %>% do(1))) (expect_error(df %>% do(1))) (expect_error(df %>% do("a"))) # can't use both named and unnamed args (expect_error(df %>% do(x = 1, 2))) }) }) dplyr/tests/testthat/test-colwise-filter.R0000644000176200001440000000523714366556340020461 0ustar liggesuserstest_that("filter_if()", { expect_identical(nrow(filter_if(mtcars, is_integerish, all_vars(. > 1))), 0L) expect_identical(nrow(filter_if(mtcars, is_integerish, all_vars(. > 0))), 7L) }) test_that("filter_at()", { sepal_large <- filter_at(iris, vars(starts_with("Sepal")), all_vars(. > 4)) sepal_large_expected <- filter(iris, Sepal.Length > 4, Sepal.Width > 4) expect_equal(sepal_large, sepal_large_expected) }) test_that("filter_all()", { expect_identical(filter_all(mtcars, any_vars(. > 200))$disp, mtcars$disp[mtcars$disp > 200]) }) test_that("filter_at can filter by grouping variables (#3351, #3480)", { tbl <- tibble(gr1 = rep(1:2, 4), gr2 = rep(1:2, each = 4), x = 1:8) %>% group_by(gr1) expect_identical( filter_at(tbl, vars(gr1), all_vars(. > 1)), filter(tbl, gr1 > 1) ) }) test_that("filter_if and filter_all includes grouping variables (#3351, #3480)", { tbl <- tibble(gr1 = rep(1:2, 4), gr2 = rep(1:2, each = 4), x = 1:8) %>% group_by(gr1) res <- filter_all(tbl, all_vars(. > 1)) expect_true(all(res$gr1 > 1)) res <- filter_if(tbl, is.integer, all_vars(. > 1)) expect_true(all(res$gr1 > 1)) }) test_that("can supply functions to scoped filters", { exp <- as.list(mtcars[c(8, 9, 21), ]) out <- mtcars %>% filter_at(c("cyl", "am"), ~ .x == 4 | .x == 0) expect_identical(as.list(out), exp) out <- mtcars %>% filter_at(c("cyl", "am"), function(.x) .x == 4 | .x == 0) expect_identical(as.list(out), exp) }) test_that("colwise filter support .data$. in the quosure versions", { expect_identical( filter_if(iris, is.numeric, any_vars(.data$. > 4)), filter_if(iris, is.numeric, any_vars(. > 4)) ) expect_identical( filter_all(select(iris, -Species), any_vars(.data$. > 4)), filter_all(select(iris, -Species), any_vars(. > 4)) ) expect_identical( filter_at(iris, vars(contains(".")), any_vars(.data$. > 4)), filter_at(iris, vars(contains(".")), any_vars(. > 4)) ) }) test_that("all_exprs() creates intersection", { expect_identical(all_exprs(am == 1), quo(am == 1)) quo <- set_env(quo((!!quo(cyl == 2)) & (!!quo(am == 1))), base_env()) expect_identical(all_exprs(cyl == 2, am == 1), quo) }) test_that("any_exprs() creates union", { expect_identical(any_exprs(am == 1), quo(am == 1)) quo <- set_env(quo((!!quo(cyl == 2)) | (!!quo(am == 1))), base_env()) expect_identical(any_exprs(cyl == 2, am == 1), quo) }) # Errors ------------------------------------------------------------------ test_that("colwise filter() give meaningful errors", { expect_snapshot({ (expect_error(filter_if(mtcars, is_character, all_vars(. > 0)))) (expect_error(filter_all(mtcars, list(~ . > 0)))) }) }) dplyr/tests/testthat/test-case-match.R0000644000176200001440000000321714366556340017532 0ustar liggesuserstest_that("LHS can match multiple values", { expect_equal(case_match(1, 1:2 ~ "x"), "x") }) test_that("LHS can match special values", { expect_equal(case_match(NA, NA ~ "x"), "x") expect_equal(case_match(NaN, NaN ~ "x"), "x") }) test_that("RHS is recycled to match x", { x <- 1:3 expect_equal(case_match(x, c(1, 3) ~ x * 2), c(2, NA, 6)) }) test_that("`NULL` values in `...` are dropped", { expect_identical( case_match(1:2, 1 ~ "a", NULL, 2 ~ "b", NULL), c("a", "b") ) }) test_that("requires at least one condition", { expect_snapshot(error = TRUE, { case_match(1) }) expect_snapshot(error = TRUE, { case_match(1, NULL) }) }) test_that("passes through `.default` correctly", { expect_identical(case_match(1, 3 ~ 1, .default = 2), 2) expect_identical(case_match(1:5, 6 ~ 1, .default = 2), rep(2, 5)) expect_identical(case_match(1:5, 6 ~ 1:5, .default = 2:6), 2:6) }) test_that("`.default` is part of common type computation", { expect_identical(case_match(1, 1 ~ 1L, .default = 2), 1) expect_snapshot(error = TRUE, { case_match(1, 1 ~ 1L, .default = "x") }) }) test_that("passes through `.ptype` correctly", { expect_identical(case_match(1, 1 ~ 1, .ptype = integer()), 1L) }) test_that("`NULL` formula element throws meaningful error", { expect_snapshot(error = TRUE, { case_match(1, 1 ~ NULL) }) expect_snapshot(error = TRUE, { case_match(1, NULL ~ 1) }) }) test_that("throws chained errors when formula evaluation fails", { expect_snapshot(error = TRUE, { case_match(1, 1 ~ 2, 3 ~ stop("oh no!")) }) expect_snapshot(error = TRUE, { case_match(1, 1 ~ 2, stop("oh no!") ~ 4) }) }) dplyr/tests/testthat/test-across.R0000644000176200001440000011240014525503021016773 0ustar liggesusers# across ------------------------------------------------------------------ test_that("across() works on one column data.frame", { df <- data.frame(x = 1) out <- df %>% mutate(across(everything(), identity)) expect_equal(out, df) }) test_that("across() does not select grouping variables", { df <- data.frame(g = 1, x = 1) out <- df %>% group_by(g) %>% summarise(x = across(everything(), identity)) %>% pull() expect_equal(out, tibble(x = 1)) }) test_that("across() correctly names output columns", { gf <- tibble(x = 1, y = 2, z = 3, s = "") %>% group_by(x) expect_named( summarise(gf, across(everything(), identity)), c("x", "y", "z", "s") ) expect_named( summarise(gf, across(everything(), identity, .names = "id_{.col}")), c("x", "id_y", "id_z", "id_s") ) expect_named( summarise(gf, across(where(is.numeric), mean)), c("x", "y", "z") ) expect_named( summarise(gf, across(where(is.numeric), mean, .names = "mean_{.col}")), c("x", "mean_y", "mean_z") ) expect_named( summarise(gf, across(where(is.numeric), list(mean = mean, sum = sum))), c("x", "y_mean", "y_sum", "z_mean", "z_sum") ) expect_named( summarise(gf, across(where(is.numeric), list(mean = mean, sum))), c("x", "y_mean", "y_2", "z_mean", "z_2") ) expect_named( summarise(gf, across(where(is.numeric), list(mean, sum = sum))), c("x", "y_1", "y_sum", "z_1", "z_sum") ) expect_named( summarise(gf, across(where(is.numeric), list(mean, sum))), c("x", "y_1", "y_2", "z_1", "z_2") ) expect_named( summarise(gf, across(where(is.numeric), list(mean = mean, sum = sum), .names = "{.fn}_{.col}")), c("x", "mean_y", "sum_y", "mean_z", "sum_z") ) }) test_that("across(.unpack =) can unpack data frame columns", { fn1 <- function(x) { tibble(a = x, b = x + 1) } fn2 <- function(x) { tibble(c = -x, d = x - 1) } df <- tibble(x = 1:2, y = 3:4) out <- mutate(df, across(x:y, list(one = fn1, two = fn2), .unpack = TRUE)) expect <- tibble( x = 1:2, y = 3:4, x_one_a = x, x_one_b = x + 1, x_two_c = -x, x_two_d = x - 1, y_one_a = y, y_one_b = y + 1, y_two_c = -y, y_two_d = y - 1 ) expect_identical(out, expect) }) test_that("across(.unpack =) allows a glue specification for `.unpack`", { fn <- function(x) { tibble(a = x, b = x + 1) } df <- tibble(x = 1) out <- mutate(df, across(x, fn, .unpack = "{outer}.{inner}")) expect_named(out, c("x", "x.a", "x.b")) # Can use variables from caller env out <- local({ name <- "name" mutate(df, across(x, fn, .unpack = "{name}.{inner}")) }) expect_named(out, c("x", "name.a", "name.b")) }) test_that("across(.unpack =) skips unpacking non-df-cols", { fn <- function(x) { tibble(a = x, b = x + 1) } df <- tibble(x = 1) out <- mutate(df, across(x, list(fn = fn, double = ~.x * 2), .unpack = TRUE)) expect <- tibble(x = 1, x_fn_a = 1, x_fn_b = 2, x_double = 2) expect_identical(out, expect) }) test_that("across(.unpack =) uses the result of `.names` as `{outer}`", { fn <- function(x) { tibble(a = x, b = x + 1) } df <- tibble(x = 1, y = 2) out <- df %>% mutate( across(x:y, list(f = fn), .names = "{.col}.{.fn}", .unpack = "{inner}.{outer}") ) expect_named(out, c("x", "y", "a.x.f", "b.x.f", "a.y.f", "b.y.f")) }) test_that("across(.unpack =) errors if the unpacked data frame has non-unique names", { fn <- function(x) { tibble(a = x, b = x) } df <- tibble(x = 1, y = 2) expect_snapshot(error = TRUE, { mutate(df, across(x:y, fn, .unpack = "{outer}")) }) }) test_that("`.unpack` is validated", { df <- tibble(x = 1) expect_snapshot(error = TRUE, { summarise(df, across(x, mean, .unpack = 1)) }) expect_snapshot(error = TRUE, { summarise(df, across(x, mean, .unpack = c("x", "y"))) }) expect_snapshot(error = TRUE, { summarise(df, across(x, mean, .unpack = NA)) }) }) test_that("across() result locations are aligned with column names (#4967)", { df <- tibble(x = 1:2, y = c("a", "b")) expect <- tibble(x_cls = "integer", x_type = TRUE, y_cls = "character", y_type = FALSE) x <- summarise(df, across(everything(), list(cls = class, type = is.numeric))) expect_identical(x, expect) }) test_that("across() works sequentially (#4907)", { df <- tibble(a = 1) expect_equal( mutate(df, x = ncol(across(where(is.numeric), identity)), y = ncol(across(where(is.numeric), identity))), tibble(a = 1, x = 1L, y = 2L) ) expect_equal( mutate(df, a = "x", y = ncol(across(where(is.numeric), identity))), tibble(a = "x", y = 0L) ) expect_equal( mutate(df, x = 1, y = ncol(across(where(is.numeric), identity))), tibble(a = 1, x = 1, y = 2L) ) }) test_that("across() retains original ordering", { df <- tibble(a = 1, b = 2) expect_named(mutate(df, a = 2, x = across(everything(), identity))$x, c("a", "b")) }) test_that("across() throws meaningful error with failure during expansion (#6534)", { df <- tibble(g = 1, x = 1, y = 2, z = 3) gdf <- group_by(df, g) fn <- function() { stop("oh no!") } # Ends up failing inside the `fn()` call, which gets evaluated # during `across()` expansion but outside any group context expect_snapshot(error = TRUE, { summarise(df, across(everything(), fn())) }) expect_snapshot(error = TRUE, { summarise(df, across(everything(), fn()), .by = g) }) expect_snapshot(error = TRUE, { summarise(gdf, across(everything(), fn())) }) }) test_that("across() gives meaningful messages", { expect_snapshot({ # expanding (expect_error( tibble(x = 1) %>% summarise(across(where(is.numeric), 42)) )) (expect_error( tibble(x = 1) %>% summarise(across(y, mean)) )) # computing (expect_error( tibble(x = 1) %>% summarise(res = across(where(is.numeric), 42)) )) (expect_error( tibble(x = 1) %>% summarise(z = across(y, mean)) )) (expect_error( tibble(x = 1) %>% summarise(res = sum(if_any(where(is.numeric), 42))) )) (expect_error( tibble(x = 1) %>% summarise(res = sum(if_all(~mean(.x)))) )) (expect_error( tibble(x = 1) %>% summarise(res = sum(if_any(~mean(.x)))) )) (expect_error(across())) (expect_error(c_across())) # problem while computing error_fn <- function(.) { if (all(. > 10)) { rlang::abort("too small", call = call("error_fn")) } else { 42 } } (expect_error( # expanding tibble(x = 1:10, y = 11:20) %>% summarise(across(everything(), error_fn)) )) (expect_error( # expanding tibble(x = 1:10, y = 11:20) %>% mutate(across(everything(), error_fn)) )) (expect_error( # evaluating tibble(x = 1:10, y = 11:20) %>% summarise(force(across(everything(), error_fn))) )) (expect_error( # evaluating tibble(x = 1:10, y = 11:20) %>% mutate(force(across(everything(), error_fn))) )) # name issue (expect_error( tibble(x = 1) %>% summarise(across(everything(), list(f = mean, f = mean))) )) }) }) test_that("monitoring cache - across() can be used twice in the same expression", { df <- tibble(a = 1, b = 2) expect_equal( mutate(df, x = ncol(across(where(is.numeric), identity)) + ncol(across(a, identity))), tibble(a = 1, b = 2, x = 3) ) }) test_that("monitoring cache - across() can be used in separate expressions", { df <- tibble(a = 1, b = 2) expect_equal( mutate(df, x = ncol(across(where(is.numeric), identity)), y = ncol(across(a, identity))), tibble(a = 1, b = 2, x = 2, y = 1) ) }) test_that("monitoring cache - across() usage can depend on the group id", { df <- tibble(g = 1:2, a = 1:2, b = 3:4) df <- group_by(df, g) switcher <- function() { if_else(cur_group_id() == 1L, across(a, identity)$a, across(b, identity)$b) } expect <- df expect$x <- c(1L, 4L) expect_equal( mutate(df, x = switcher()), expect ) }) test_that("monitoring cache - across() internal cache key depends on all inputs", { df <- tibble(g = rep(1:2, each = 2), a = 1:4) df <- group_by(df, g) expect_identical( mutate(df, tibble(x = across(where(is.numeric), mean)$a, y = across(where(is.numeric), max)$a)), mutate(df, x = mean(a), y = max(a)) ) }) test_that("across() rejects non vectors", { expect_error( data.frame(x = 1) %>% summarise(across(everything(), ~sym("foo"))) ) }) test_that("across() uses tidy recycling rules", { expect_equal( data.frame(x = 1, y = 2) %>% reframe(across(everything(), ~rep(42, .))), data.frame(x = rep(42, 2), y = rep(42, 2)) ) expect_error( data.frame(x = 2, y = 3) %>% reframe(across(everything(), ~rep(42, .))) ) }) test_that("across() returns a data frame with 1 row (#5204)", { df <- tibble(x = 1:42) expect_equal( mutate(df, across(c(), as.factor)), df ) expect_equal( mutate(df, y = across(c(), as.factor))$y, tibble::new_tibble(list(), nrow = 42) ) mutate(df, { res <- across(c(), as.factor) expect_equal(nrow(res), 1L) res }) }) test_that("across(.names=) can use local variables in addition to {col} and {fn}", { res <- local({ prefix <- "MEAN" data.frame(x = 42) %>% summarise(across(everything(), mean, .names = "{prefix}_{.col}")) }) expect_identical(res, data.frame(MEAN_x = 42)) }) test_that("across(.unpack=) can use local variables in addition to {outer} and {inner}", { fn <- function(x) { tibble(x = x, y = x + 1) } res <- local({ prefix <- "FN" data.frame(col1 = 42, col2 = 24) %>% summarise(across(everything(), fn, .unpack = "{prefix}_{outer}_{inner}")) }) expect_identical( res, data.frame( FN_col1_x = 42, FN_col1_y = 43, FN_col2_x = 24, FN_col2_y = 25 ) ) }) test_that("across() uses environment from the current quosure (#5460)", { # If the data frame `y` is selected, causes a subscript conversion # error since it is fractional df <- data.frame(x = 1, y = 2.4) y <- "x" expect_equal(df %>% summarise(across(all_of(y), mean)), data.frame(x = 1)) expect_equal(df %>% mutate(across(all_of(y), mean)), df) expect_equal(df %>% filter(if_all(all_of(y), ~ .x < 2)), df) # Inherited case expect_error(df %>% summarise(local(across(all_of(y), mean)))) expect_equal( df %>% summarise(summarise(pick(everything()), across(all_of(y), mean))), df %>% summarise(across(all_of(y), mean)) ) }) test_that("across() sees columns in the recursive case (#5498)", { skip_if_not_installed("purrr") df <- tibble( vars = list("foo"), data = list(data.frame(foo = 1, bar = 2)) ) out <- df %>% mutate(data = purrr::map2(data, vars, ~ { .x %>% mutate(across(all_of(.y), ~ NA)) })) exp <- tibble( vars = list("foo"), data = list(data.frame(foo = NA, bar = 2)) ) expect_identical(out, exp) out <- df %>% mutate(data = purrr::map2(data, vars, ~ { local({ .y <- "bar" .x %>% mutate(across(all_of(.y), ~ NA)) }) })) exp <- tibble( vars = list("foo"), data = list(data.frame(foo = 1, bar = NA)) ) expect_identical(out, exp) }) test_that("across() works with empty data frames (#5523)", { expect_equal( mutate(tibble(), across(everything(), identity)), tibble() ) }) test_that("lambdas in mutate() + across() can use columns", { df <- tibble(x = 2, y = 4, z = 8) expect_identical( df %>% mutate(data.frame(x = x / y, y = y / y, z = z / y)), df %>% mutate(across(everything(), ~ .x / y)) ) expect_identical( df %>% mutate(data.frame(x = x / y, y = y / y, z = z / y)), df %>% mutate(+across(everything(), ~ .x / y)) ) expect_identical( df %>% mutate(data.frame(x = x / y, y = y / y, z = z / y)), df %>% mutate(across(everything(), ~ .x / .data$y)) ) expect_identical( df %>% mutate(data.frame(x = x / y, y = y / y, z = z / y)), df %>% mutate(+across(everything(), ~ .x / .data$y)) ) }) test_that("lambdas in summarise() + across() can use columns", { df <- tibble(x = 2, y = 4, z = 8) expect_identical( df %>% summarise(data.frame(x = x / y, y = y / y, z = z / y)), df %>% summarise(across(everything(), ~ .x / y)) ) expect_identical( df %>% summarise(data.frame(x = x / y, y = y / y, z = z / y)), df %>% summarise(+across(everything(), ~ .x / y)) ) expect_identical( df %>% summarise(data.frame(x = x / y, y = y / y, z = z / y)), df %>% summarise(across(everything(), ~ .x / .data$y)) ) expect_identical( df %>% summarise(data.frame(x = x / y, y = y / y, z = z / y)), df %>% summarise(+across(everything(), ~ .x / .data$y)) ) }) test_that("lambdas in mutate() + across() can use columns in follow up expressions (#5717)", { df <- tibble(x = 2, y = 4, z = 8) expect_identical( df %>% mutate(a = 2, data.frame(x = x / y, y = y / y, z = z / y)), df %>% mutate(a = 2, across(c(x, y, z), ~ .x / y)) ) expect_identical( df %>% mutate(a = 2, data.frame(x = x / y, y = y / y, z = z / y)), df %>% mutate(a = 2, +across(c(x, y, z), ~ .x / y)) ) expect_identical( df %>% mutate(a = 2, data.frame(x = x / y, y = y / y, z = z / y)), df %>% mutate(a = 2, across(c(x, y, z), ~ .x / .data$y)) ) expect_identical( df %>% mutate(a = 2, data.frame(x = x / y, y = y / y, z = z / y)), df %>% mutate(a = 2, +across(c(x, y, z), ~ .x / .data$y)) ) }) test_that("lambdas in summarise() + across() can use columns in follow up expressions (#5717)", { df <- tibble(x = 2, y = 4, z = 8) expect_identical( df %>% summarise(a = 2, data.frame(x = x / y, y = y / y, z = z / y)), df %>% summarise(a = 2, across(c(x, y, z), ~ .x / y)) ) expect_identical( df %>% summarise(a = 2, data.frame(x = x / y, y = y / y, z = z / y)), df %>% summarise(a = 2, +across(c(x, y, z), ~ .x / y)) ) expect_identical( df %>% summarise(a = 2, data.frame(x = x / y, y = y / y, z = z / y)), df %>% summarise(a = 2, across(c(x, y, z), ~ .x / .data$y)) ) expect_identical( df %>% summarise(a = 2, data.frame(x = x / y, y = y / y, z = z / y)), df %>% summarise(a = 2, +across(c(x, y, z), ~ .x / .data$y)) ) }) test_that("functions defined inline can use columns (#5734)", { df <- data.frame(x = 1, y = 2) expect_equal( df %>% mutate(across('x', function(.x) .x / y)) %>% pull(x), 0.5 ) }) test_that("if_any() and if_all() do not enforce logical", { # We used to coerce to logical using vctrs. Now we use base # semantics because we expand `if_all(x:y)` to `x & y`. d <- data.frame(x = 10, y = 10) expect_equal(filter(d, if_all(x:y, identity)), d) expect_equal(filter(d, if_any(x:y, identity)), d) expect_equal( mutate(d, ok = if_any(x:y, identity)), mutate(d, ok = TRUE) ) expect_equal( mutate(d, ok = if_all(x:y, identity)), mutate(d, ok = TRUE) ) }) test_that("if_any() and if_all() can be used in mutate() (#5709)", { d <- data.frame(x = c(1, 5, 10, 10), y = c(0, 0, 0, 10), z = c(10, 5, 1, 10)) res <- d %>% mutate( any = if_any(x:z, ~ . > 8), all = if_all(x:z, ~ . > 8) ) expect_equal(res$any, c(TRUE, FALSE, TRUE, TRUE)) expect_equal(res$all, c(FALSE, FALSE, FALSE, TRUE)) }) test_that("across() caching not confused when used from if_any() and if_all() (#5782)", { res <- data.frame(x = 1:3) %>% mutate( any = if_any(x, ~ . >= 2) + if_any(x, ~ . >= 3), all = if_all(x, ~ . >= 2) + if_all(x, ~ . >= 3) ) expect_equal(res$any, c(0, 1, 2)) expect_equal(res$all, c(0, 1, 2)) }) test_that("if_any() and if_all() respect filter()-like NA handling", { df <- expand.grid( x = c(TRUE, FALSE, NA), y = c(TRUE, FALSE, NA) ) expect_identical( filter(df, x & y), filter(df, if_all(c(x,y), identity)) ) expect_identical( filter(df, x | y), filter(df, if_any(c(x,y), identity)) ) }) test_that("if_any() and if_all() aborts when predicate mistakingly used in .cols= (#5732)", { df <- data.frame(x = 1:10, y = 1:10) expect_snapshot({ # expanded case (expect_error(filter(df, if_any(~ .x > 5)))) (expect_error(filter(df, if_all(~ .x > 5)))) # non expanded case (expect_error(filter(df, !if_any(~ .x > 5)))) (expect_error(filter(df, !if_all(~ .x > 5)))) }) }) test_that("across() correctly reset column", { expect_error(cur_column()) res <- data.frame(x = 1) %>% summarise( a = { expect_error(cur_column()); 2}, across(x, ~{ expect_equal(cur_column(), "x"); 3}, .names = "b"), # top_across() c = { expect_error(cur_column()); 4}, force(across(x, ~{ expect_equal(cur_column(), "x"); 5}, .names = "d")), # across() e = { expect_error(cur_column()); 6} ) expect_equal(res, data.frame(a = 2, b = 3, c = 4, d = 5, e = 6)) expect_error(cur_column()) res <- data.frame(x = 1) %>% mutate( a = { expect_error(cur_column()); 2}, across(x, ~{ expect_equal(cur_column(), "x"); 3}, .names = "b"), # top_across() c = { expect_error(cur_column()); 4}, force(across(x, ~{ expect_equal(cur_column(), "x"); 5}, .names = "d")), # across() e = { expect_error(cur_column()); 6} ) expect_equal(res, data.frame(x = 1, a = 2, b = 3, c = 4, d = 5, e = 6)) expect_error(cur_column()) }) test_that("across() can omit dots", { df <- tibble(x = tibble(foo = 1), y = tibble(foo = 2)) # top res <- mutate(df, across( everything(), list )) expect_equal(res$x[[1]]$foo, 1) expect_equal(res$y[[1]]$foo, 2) # not top res <- mutate(df, force(across( everything(), list ))) expect_equal(res$x[[1]]$foo, 1) expect_equal(res$y[[1]]$foo, 2) }) test_that("group variables are in scope (#5832)", { f <- function(x, z) x + z gdf <- data.frame(x = 1:2, y = 3:4, g = 1:2) %>% group_by(g) exp <- gdf %>% summarise(x = f(x, z = y)) expect_equal( gdf %>% summarise(across(x, ~ f(.x, z = y))), exp ) expect_equal( gdf %>% summarise(across(x, ~ f(.x, z = y))), exp ) }) test_that("can pass quosure through `across()`", { summarise_mean <- function(data, vars) { data %>% summarise(across({{ vars }}, mean)) } gdf <- data.frame(g = c(1, 1, 2), x = 1:3) %>% group_by(g) expect_equal( gdf %>% summarise_mean(where(is.numeric)), summarise(gdf, x = mean(x)) ) }) test_that("across() inlines formulas", { # Env of captured quosure passed to `as_across_fn_call()`. The # unevaluated lambdas should inherit from that env after inlining. env <- env() lambda <- quo_eval_fns(quo(function(x) fn(x)), mask = env) out <- as_across_fn_call(lambda, quote(var), env, env) expect_equal(out, new_quosure(quote(fn(var)), env)) formula <- quo_eval_fns(quo(~ fn(.x)), mask = env) out <- as_across_fn_call(formula, quote(var), env, env) expect_equal(out, new_quosure(quote(fn(var)), env)) # Evaluated formulas preserve their own env f <- local(~ fn(.x)) fn <- quo_eval_fns(quo(!!f), mask = env) out <- as_across_fn_call(fn, quote(var), env, env) expect_equal(get_env(f), get_env(fn)) expect_equal(out, new_quosure(call2(fn, quote(var)), env)) # Inlining is disabled for complex lambda calls fn <- quo_eval_fns(quo(function(x, y) x), mask = env) out <- as_across_fn_call(fn, quote(var), env, env) expect_equal(out, new_quosure(call2(fn, quote(var)), env)) # Formulas are converted to functions expect_rlang_lambda <- function(fn) { expect_s3_class(fn, "rlang_lambda_function") out <- as_across_fn_call(fn, quote(var), env, env) expect_equal(out, new_quosure(call2(fn, quote(var)), env)) } out <- quo_eval_fns(quo(~ .y), mask = env) expect_rlang_lambda(out) out <- quo_eval_fns(quo(list(~ .y)), mask = env) expect_type(out, "list") map(out, expect_rlang_lambda) # All formula-lambda arguments are interpolated fn <- quo_eval_fns(quo(~ list(.x, ., .x)), mask = env) out <- as_across_fn_call(fn, quote(var), env, env) expect_equal( out, new_quosure(quote(list(var, var, var)), f_env(f)) ) }) test_that("inlined and non inlined lambdas work", { df <- data.frame(foo = 1:2, bar = 100:101) exp <- data.frame(foo = c(101.5, 102.5), bar = c(200.5, 201.5)) expect_equal(df %>% mutate(across(1:2, function(x) x + mean(bar))), exp) expect_equal(df %>% mutate((across(1:2, function(x) x + mean(bar)))), exp) expect_equal(df %>% mutate(across(1:2, ~ .x + mean(bar))), exp) expect_equal(df %>% mutate((across(1:2, ~ .x + mean(bar)))), exp) expect_equal(df %>% mutate(across(1:2, ~ ..1 + mean(bar))), exp) expect_equal(df %>% mutate((across(1:2, ~ ..1 + mean(bar)))), exp) # Message generated by base R changed skip_if_not_installed("base", "3.6.0") expect_snapshot({ (expect_error(df %>% mutate(across(1:2, ~ .y + mean(bar))))) (expect_error(df %>% mutate((across(1:2, ~ .y + mean(bar)))))) }) }) test_that("list of lambdas work", { df <- data.frame(foo = 1:2, bar = 100:101) exp <- cbind( df, data.frame(foo_1 = c(101.5, 102.5), bar_1 = c(200.5, 201.5)) ) expect_equal(df %>% mutate(across(1:2, list(function(x) x + mean(bar)))), exp) expect_equal(df %>% mutate((across(1:2, list(function(x) x + mean(bar))))), exp) expect_equal(df %>% mutate(across(1:2, list(~ .x + mean(bar)))), exp) expect_equal(df %>% mutate((across(1:2, list(~ .x + mean(bar))))), exp) }) test_that("anonymous function `.fns` can access the `.data` pronoun even when not inlined", { df <- tibble(x = 1:2, y = 3:4) # Can't access it here, `fn()`'s environment doesn't know about `.data` fn <- function(col) { .data[["x"]] } expect_snapshot(error = TRUE, { mutate(df, across(y, fn)) }) # Can access it with inlinable quosures out <- mutate(df, across(y, function(col) { .data[["x"]] })) expect_identical(out$y, out$x) # Can access it with non-inlinable quosures out <- mutate(df, across(y, function(col) { return(.data[["x"]]) })) expect_identical(out$y, out$x) }) test_that("across() uses local formula environment (#5881)", { f <- local({ prefix <- "foo" ~ paste(prefix, .x) }) df <- tibble(x = "x") expect_equal( mutate(df, across(x, f)), tibble(x = "foo x") ) expect_equal( mutate(df, across(x, list(f = f))), tibble(x = "x", x_f = "foo x") ) local({ # local() here is not necessary, it's just in case the # code is run directly without the test_that() prefix <- "foo" expect_equal( mutate(df, across(x, ~paste(prefix, .x))), tibble(x = "foo x") ) expect_equal( mutate(df, across(x, list(f = ~paste(prefix, .x)))), tibble(x = "x", x_f = "foo x") ) }) expect_equal( data.frame(x = 1) %>% mutate(across(1, list(f = local(~ . + 1)))), data.frame(x = 1, x_f = 2) ) expect_equal( data.frame(x = 1) %>% mutate(across(1, local({ `_local_var` <- 1 ~ . + `_local_var` }))), data.frame(x = 2) ) }) test_that("unevaluated formulas (currently) fail", { df <- tibble(x = "x") expect_error( mutate(df, across(x, quote(~ paste("foo", .x)))) ) }) test_that("across() can access lexical scope (#5862)", { f_across <- function(data, cols, fn) { data %>% summarise( across({{ cols }}, fn) ) } df <- data.frame(x = 1:10, y = 1:10) expect_equal( f_across(df, c(x, y), mean), summarise(df, across(c(x, y), mean)) ) }) test_that("across() allows renaming in `.cols` (#6895)", { df <- tibble(x = 1, y = 2, z = 3) cols <- set_names(c("x", "y"), c("a", "b")) expect_identical( mutate(df, across(all_of(cols), identity)), mutate(df, a = x, b = y) ) expect_identical( mutate(df, (across(all_of(cols), identity))), mutate(df, a = x, b = y) ) expect_identical( mutate(df, across(all_of(cols), identity, .names = "{.col}_name")), mutate(df, a_name = x, b_name = y) ) expect_identical( mutate(df, (across(all_of(cols), identity, .names = "{.col}_name"))), mutate(df, a_name = x, b_name = y) ) }) test_that("if_any() and if_all() expansions deal with no inputs or single inputs", { d <- data.frame(x = 1) # No inputs expect_equal( filter(d, if_any(starts_with("c"), ~ FALSE)), filter(d) ) expect_equal( filter(d, if_all(starts_with("c"), ~ FALSE)), filter(d) ) # Single inputs expect_equal( filter(d, if_any(x, ~ FALSE)), filter(d, FALSE) ) expect_equal( filter(d, if_all(x, ~ FALSE)), filter(d, FALSE) ) }) test_that("if_any() and if_all() wrapped deal with no inputs or single inputs", { d <- data.frame(x = 1) # No inputs expect_equal( filter(d, (if_any(starts_with("c"), ~ FALSE))), filter(d) ) expect_equal( filter(d, (if_all(starts_with("c"), ~ FALSE))), filter(d) ) # Single inputs expect_equal( filter(d, (if_any(x, ~ FALSE))), filter(d, FALSE) ) expect_equal( filter(d, (if_all(x, ~ FALSE))), filter(d, FALSE) ) }) test_that("expanded if_any() finds local data", { limit <- 7 df <- data.frame(x = 1:10, y = 10:1) expect_identical( filter(df, if_any(everything(), ~ .x > limit)), filter(df, x > limit | y > limit) ) }) test_that("across() can use named selections", { df <- data.frame(x = 1, y = 2) # no fns expect_equal( df %>% summarise(across(c(a = x, b = y))), data.frame(a = 1, b = 2) ) expect_equal( df %>% summarise(across(all_of(c(a = "x", b = "y")))), data.frame(a = 1, b = 2) ) # no fns, non expanded expect_equal( df %>% summarise((across(c(a = x, b = y)))), data.frame(a = 1, b = 2) ) expect_equal( df %>% summarise((across(all_of(c(a = "x", b = "y"))))), data.frame(a = 1, b = 2) ) # one fn expect_equal( df %>% summarise(across(c(a = x, b = y), mean)), data.frame(a = 1, b = 2) ) expect_equal( df %>% summarise(across(all_of(c(a = "x", b = "y")), mean)), data.frame(a = 1, b = 2) ) # one fn - non expanded expect_equal( df %>% summarise((across(c(a = x, b = y), mean))), data.frame(a = 1, b = 2) ) expect_equal( df %>% summarise((across(all_of(c(a = "x", b = "y")), mean))), data.frame(a = 1, b = 2) ) # multiple fns expect_equal( df %>% summarise(across(c(a = x, b = y), list(mean = mean, sum = sum))), data.frame(a_mean = 1, a_sum = 1, b_mean = 2, b_sum = 2) ) expect_equal( df %>% summarise(across(all_of(c(a = "x", b = "y")), list(mean = mean, sum = sum))), data.frame(a_mean = 1, a_sum = 1, b_mean = 2, b_sum = 2) ) # multiple fns - non expanded expect_equal( df %>% summarise((across(c(a = x, b = y), list(mean = mean, sum = sum)))), data.frame(a_mean = 1, a_sum = 1, b_mean = 2, b_sum = 2) ) expect_equal( df %>% summarise((across(all_of(c(a = "x", b = "y")), list(mean = mean, sum = sum)))), data.frame(a_mean = 1, a_sum = 1, b_mean = 2, b_sum = 2) ) }) test_that("expr_subtitute() stops at lambdas (#5896)", { expect_identical( expr_substitute(expr(map(.x, ~mean(.x))), quote(.x), quote(a)), expr(map(a, ~mean(.x))) ) expect_identical( expr_substitute(expr(map(.x, function(.x) mean(.x))), quote(.x), quote(a)), expr(map(a, function(.x) mean(.x))) ) }) test_that("expr_subtitute() keeps at double-sided formula (#5894)", { expect_identical( expr_substitute(expr(case_when(.x < 5 ~ 5, .default = .x)), quote(.x), quote(a)), expr(case_when(a < 5 ~ 5, .default = a)) ) expect_identical( expr_substitute(expr(case_when(. < 5 ~ 5, .default = .)), quote(.), quote(a)), expr(case_when(a < 5 ~ 5, .default = a)) ) }) test_that("across() predicates operate on whole data", { df <- tibble( x = c(1, 1, 2), g = c(1, 1, 2) ) out <- df %>% mutate(across(where(~ n_distinct(.x) > 1), ~ .x + 10)) exp <- tibble( x = c(11, 11, 12), g = c(11, 11, 12) ) expect_equal(out, exp) out <- df %>% group_by(g) %>% mutate(across(where(~ n_distinct(.x) > 1), ~ .x + 10)) exp <- tibble( x = c(11, 11, 12), g = c(1, 1, 2) ) %>% group_by(g) expect_equal(out, exp) }) test_that("expand_across() expands lambdas", { quo <- quo(across(c(cyl, am), ~ identity(.x))) quo <- new_dplyr_quosure( quo, name = quo, is_named = FALSE, index = 1 ) by <- compute_by(by = NULL, data = mtcars, error_call = call("caller")) DataMask$new(mtcars, by, "mutate", call("caller")) expect_equal( map(expand_across(quo), quo_get_expr), exprs( cyl = identity(cyl), am = identity(am) ) ) }) test_that("expand_if_across() expands lambdas", { quo <- quo(if_any(c(cyl, am), ~ . > 4)) quo <- new_dplyr_quosure( quo, name = quo, is_named = FALSE, index = 1 ) by <- compute_by(by = NULL, data = mtcars, error_call = call("caller")) DataMask$new(mtcars, by, "mutate", call("caller")) expect_equal( map(expand_if_across(quo), quo_squash), alist(`|`(cyl > 4, am > 4)) ) }) test_that("rowwise() preserves list-cols iff no `.fns` (#5951, #6264)", { # TODO: Deprecate this behavior in favor of `pick()`, which doesn't preserve # list-cols but is well-defined as pure macro expansion. rf <- rowwise(tibble(x = list(1:2, 3:5))) # Need to unchop so works like mutate(rf, x = length(x)) out <- mutate(rf, across(everything(), length)) expect_equal(out$x, c(2, 3)) # Need to preserve to create valid data frame out <- mutate(rf, across = list(across(everything()))) expect_equal(out$across, list( tibble(x = list(1:2)), tibble(x = list(3:5)) )) }) # c_across ---------------------------------------------------------------- test_that("selects and combines columns", { df <- data.frame(x = 1:2, y = 3:4) out <- df %>% summarise(z = list(c_across(x:y))) expect_equal(out$z, list(1:4)) }) test_that("can't rename during selection (#6522)", { df <- tibble(x = 1) expect_snapshot(error = TRUE, { mutate(df, z = c_across(c(y = x))) }) }) test_that("can't explicitly select grouping columns (#6522)", { # Related to removing the mask layer from the quosure environments df <- tibble(g = 1, x = 2) gdf <- group_by(df, g) expect_snapshot(error = TRUE, { mutate(gdf, y = c_across(g)) }) }) test_that("`all_of()` is evaluated in the correct environment (#6522)", { # Related to removing the mask layer from the quosure environments df <- tibble(x = 1, y = 2) # We expect an "object not found" error, but we don't control that # so we aren't going to snapshot it, especially since the call reported # by those kinds of errors changed in R 4.3. expect_error(mutate(df, z = c_across(all_of(y)))) y <- "x" expect <- df[["x"]] out <- mutate(df, z = c_across(all_of(y))) expect_identical(out$z, expect) }) # cols deprecation -------------------------------------------------------- test_that("across() applies old `.cols = everything()` default with a warning", { local_options(lifecycle_verbosity = "warning") df <- tibble(g = c(1, 2), x = c(1, 2), y = c(3, 4)) gdf <- group_by(df, g) times_two <- function(x) x * 2 # Expansion path expect_snapshot(out <- mutate(df, across(.fns = times_two))) expect_identical(out$g, df$g * 2) expect_identical(out$x, df$x * 2) expect_identical(out$y, df$y * 2) expect_snapshot(out <- mutate(gdf, across(.fns = times_two))) expect_identical(out$g, df$g) expect_identical(out$x, df$x * 2) expect_identical(out$y, df$y * 2) # Evaluation path expect_snapshot(out <- mutate(df, (across(.fns = times_two)))) expect_identical(out$g, df$g * 2) expect_identical(out$x, df$x * 2) expect_identical(out$y, df$y * 2) expect_snapshot(out <- mutate(gdf, (across(.fns = times_two)))) expect_identical(out$g, df$g) expect_identical(out$x, df$x * 2) expect_identical(out$y, df$y * 2) }) test_that("if_any() and if_all() apply old `.cols = everything()` default with a warning", { local_options(lifecycle_verbosity = "warning") df <- tibble(x = c(TRUE, FALSE, TRUE), y = c(FALSE, FALSE, TRUE)) gdf <- mutate(df, g = c(1, 1, 2), .before = 1) gdf <- group_by(gdf, g) # Expansion path expect_snapshot(out <- filter(df, if_any())) expect_identical(out, df[c(1, 3),]) expect_snapshot(out <- filter(gdf, if_any())) expect_identical(out, gdf[c(1, 3),]) expect_snapshot(out <- filter(df, if_all())) expect_identical(out, df[3,]) expect_snapshot(out <- filter(gdf, if_all())) expect_identical(out, gdf[3,]) # Evaluation path expect_snapshot(out <- filter(df, (if_any()))) expect_identical(out, df[c(1, 3),]) expect_snapshot(out <- filter(gdf, (if_any()))) expect_identical(out, gdf[c(1, 3),]) expect_snapshot(out <- filter(df, (if_all()))) expect_identical(out, df[3,]) expect_snapshot(out <- filter(gdf, (if_all()))) expect_identical(out, gdf[3,]) }) test_that("c_across() applies old `cols = everything()` default with a warning", { local_options(lifecycle_verbosity = "warning") df <- tibble(x = c(1, 3), y = c(2, 4)) df <- rowwise(df) # Will see 2 warnings because verbosity option forces it to warn every time expect_snapshot(out <- mutate(df, z = sum(c_across()))) expect_identical(out$z, c(3, 7)) }) # fns deprecation --------------------------------------------------------- test_that("across() applies old `.fns = NULL` default", { df <- tibble(x = 1, y = 2) # Expansion path out <- mutate(df, z = across(everything())) expect_identical(out$z, df) # Evaluation path out <- mutate(df, z = (across(everything()))) expect_identical(out$z, df) }) test_that("if_any() and if_all() apply old `.fns = NULL` default", { df <- tibble(x = c(TRUE, FALSE, TRUE), y = c(FALSE, FALSE, TRUE)) # Expansion path expect_identical(filter(df, if_any(everything())), df[c(1, 3),]) expect_identical(filter(df, if_all(everything())), df[3,]) # Evaluation path expect_identical(filter(df, (if_any(everything()))), df[c(1, 3),]) expect_identical(filter(df, (if_all(everything()))), df[3,]) }) test_that("across errors with non-empty dots and no `.fns` supplied (#6638)", { df <- tibble(x = 1) expect_snapshot( error = TRUE, mutate(df, across(x, .funs = ~ . * 1000)) ) }) # dots -------------------------------------------------------------------- test_that("across(...) is deprecated", { df <- tibble(x = c(1, NA)) expect_snapshot(summarise(df, across(everything(), mean, na.rm = TRUE))) }) test_that("across() passes ... to functions", { options(lifecycle_verbosity = "quiet") df <- tibble(x = c(1, NA)) expect_equal( summarise(df, across(everything(), mean, na.rm = TRUE)), tibble(x = 1) ) expect_equal( summarise(df, across(everything(), list(mean = mean, median = median), na.rm = TRUE)), tibble(x_mean = 1, x_median = 1) ) }) test_that("across() passes unnamed arguments following .fns as ... (#4965)", { options(lifecycle_verbosity = "quiet") df <- tibble(x = 1) expect_equal(mutate(df, across(x, `+`, 1)), tibble(x = 2)) }) test_that("across() avoids simple argument name collisions with ... (#4965)", { options(lifecycle_verbosity = "quiet") df <- tibble(x = c(1, 2)) expect_equal(summarize(df, across(x, tail, n = 1)), tibble(x = 2)) }) test_that("across() evaluates ... with promise semantics (#5813)", { options(lifecycle_verbosity = "quiet") df <- tibble(x = tibble(foo = 1), y = tibble(foo = 2)) res <- mutate(df, across( everything(), mutate, foo = foo + 1 )) expect_equal(res$x$foo, 2) expect_equal(res$y$foo, 3) # Dots are evaluated only once new_counter <- function() { n <- 0L function() { n <<- n + 1L n } } counter <- new_counter() list_second <- function(...) { list(..2) } res <- mutate(df, across( everything(), list_second, counter() )) expect_equal(res$x[[1]], 1) expect_equal(res$y[[1]], 1) }) test_that("arguments in dots are evaluated once per group", { options(lifecycle_verbosity = "quiet") set.seed(0) out <- data.frame(g = 1:3, var = NA) %>% group_by(g) %>% mutate(across(var, function(x, y) y, rnorm(1))) %>% pull(var) set.seed(0) expect_equal(out, rnorm(3)) }) test_that("group variables are in scope when passed in dots (#5832)", { options(lifecycle_verbosity = "quiet") f <- function(x, z) x + z gdf <- data.frame(x = 1:2, y = 3:4, g = 1:2) %>% group_by(g) exp <- gdf %>% summarise(x = f(x, z = y)) expect_equal( gdf %>% summarise(across(x, f, z = y)), exp ) expect_equal( gdf %>% summarise((across(x, f, z = y))), exp ) }) test_that("symbols are looked up as list or functions (#6545)", { df <- tibble(mean = 1:5) exp <- summarise(df, across(everything(), function(x) mean(x))) expect_equal( summarise(df, across(everything(), mean)), exp ) expect_equal( summarise(df, (across(everything(), mean))), exp ) exp <- summarise(df, across(everything(), list(function(x) mean(x)))) expect_equal( summarize(df, across(everything(), list(mean))), exp ) expect_equal( summarize(df, (across(everything(), list(mean)))), exp ) }) test_that("non-inlinable but maskable lambdas give precedence to function arguments", { df <- data.frame( foo = 1, bar = "a" ) out <- mutate(df, across(1:2, function(foo) return(foo))) expect_equal(out, df) }) test_that("maskable lambdas can refer to their lexical environment", { foo <- "OK" df <- tibble(bar = "a") # Non-inlinable expect_equal( mutate(df, across(1, function(x) return(paste(x, foo)))), tibble(bar = "a OK") ) expect_equal( mutate(df, across(1, ~ return(paste(.x, foo)))), tibble(bar = "a OK") ) # Inlinable expect_equal( mutate(df, across(1, function(x) paste(x, foo))), tibble(bar = "a OK") ) expect_equal( mutate(df, across(1, ~ paste(.x, foo))), tibble(bar = "a OK") ) }) dplyr/tests/testthat/test-na-if.R0000644000176200001440000000402214366556340016512 0ustar liggesuserstest_that("scalar y replaces all matching x", { x <- c(0, 1, 0) expect_identical(na_if(x, 0), c(NA, 1, NA)) expect_identical(na_if(x, 1), c(0, NA, 0)) }) test_that("`y` can be a vector the same length as `x` (matching SQL NULLIF)", { x <- c(0, 1, 0) y <- c(0, 1, 2) expect_identical(na_if(x, y), c(NA, NA, 0)) }) test_that("`NA` replacing itself is a no-op", { expect_identical(na_if(NA, NA), NA) }) test_that("missing values are allowed to equal each other, so `NaN`s can be standardized", { expect_identical(na_if(NaN, NaN), NA_real_) }) test_that("missing values equal each other in partially incomplete data frame rows", { x <- tibble( x = c(2, 1, NA, 1), y = c(1, NA, NA, NA), z = c(3, NaN, NA, NaN) ) y <- tibble(x = 1, y = NA, z = NaN) expect <- vec_assign(x, i = c(2, 4), value = NA) expect_identical(na_if(x, y), expect) }) test_that("works when there are missings in either input", { expect_identical(na_if(c(1, NA, 2), 1), c(NA, NA, 2)) expect_identical(na_if(c(1, NA, 2), c(1, NA, NA)), c(NA, NA, 2)) }) test_that("works with data frames", { x <- tibble(a = c(1, 99, 99, 99), b = c("x", "NA", "bar", "NA")) y <- tibble(a = 99, b = "NA") expect_identical( na_if(x, y), x[c(1, NA, 3, NA),] ) }) test_that("works with rcrd types", { x <- new_rcrd(list(a = c(1, 99, 99, 99), b = c("x", "NA", "bar", "NA"))) y <- new_rcrd(list(a = 99, b = "NA")) expect_identical( na_if(x, y), x[c(1, NA, 3, NA)] ) }) test_that("is type stable on `x`", { expect_identical(na_if(0L, 0), NA_integer_) expect_snapshot(error = TRUE, { na_if(0L, 1.5) }) }) test_that("is size stable on `x`", { expect_snapshot(error = TRUE, { na_if(1, integer()) }) expect_snapshot(error = TRUE, { na_if(1, c(1, 2)) }) expect_snapshot(error = TRUE, { na_if(c(1, 2, 3), c(1, 2)) }) }) test_that("requires vector types for `x` and `y`", { expect_snapshot(error = TRUE, { na_if(environment(), 1) }) expect_snapshot(error = TRUE, { na_if(1, environment()) }) }) dplyr/tests/testthat/test-locale.R0000644000176200001440000000055414366556340016765 0ustar liggesuserstest_that("`dplyr_legacy_locale()` is `FALSE` by default", { expect_false(dplyr_legacy_locale()) }) test_that("`dplyr_legacy_locale()` respects `dplyr.legacy_locale`", { local_options(dplyr.legacy_locale = TRUE) expect_true(dplyr_legacy_locale()) local_options(dplyr.legacy_locale = 1) expect_snapshot(error = TRUE, { dplyr_legacy_locale() }) }) dplyr/tests/testthat/test-deprec-dbi.R0000644000176200001440000000033414272553254017515 0ustar liggesuserstest_that("src_sqlite() gives meaningful error messages", { skip_if_not_installed("dbplyr") withr::local_options(lifecycle_verbosity = "quiet") expect_snapshot({ (expect_error(src_sqlite(":memory:"))) }) }) dplyr/tests/testthat/test-join.R0000644000176200001440000005603714472225345016470 0ustar liggesusers# Basic properties -------------------------------------------------------- test_that("mutating joins preserve row and column order of x", { df1 <- data.frame(a = 1:3) df2 <- data.frame(b = 1, c = 2, a = 4:1) out <- inner_join(df1, df2, by = "a") expect_named(out, c("a", "b", "c")) expect_equal(out$a, 1:3) out <- left_join(df1, df2, by = "a") expect_named(out, c("a", "b", "c")) expect_equal(out$a, 1:3) out <- right_join(df1, df2, by = "a") expect_named(out, c("a", "b", "c")) expect_equal(out$a, 1:4) out <- full_join(df1, df2, by = "a") expect_named(out, c("a", "b", "c")) expect_equal(out$a, 1:4) }) test_that("even when column names change", { df1 <- data.frame(x = c(1, 1, 2, 3), z = 1:4, a = 1) df2 <- data.frame(z = 1:3, b = 1, x = c(1, 2, 4)) out <- inner_join(df1, df2, by = "x") expect_named(out, c("x", "z.x", "a", "z.y", "b")) }) test_that("filtering joins preserve row and column order of x (#2964)", { df1 <- data.frame(a = 4:1, b = 1) df2 <- data.frame(b = 1, c = 2, a = 2:3) out <- semi_join(df1, df2, by = "a") expect_named(out, c("a", "b")) expect_equal(out$a, 3:2) out <- anti_join(df1, df2, by = "a") expect_named(out, c("a", "b")) expect_equal(out$a, c(4L, 1L)) }) test_that("keys are coerced to symmetric type", { foo <- tibble(id = 1:2, var1 = "foo") bar <- tibble(id = as.numeric(1:2), var2 = "bar") expect_type(inner_join(foo, bar, by = "id")$id, "double") expect_type(inner_join(bar, foo, by = "id")$id, "double") foo <- tibble(id = factor(c("a", "b")), var1 = "foo") bar <- tibble(id = c("a", "b"), var2 = "bar") expect_type(inner_join(foo, bar, by = "id")$id, "character") expect_type(inner_join(bar, foo, by = "id")$id, "character") }) test_that("factor keys are coerced to the union factor type", { df1 <- tibble(x = 1, y = factor("a")) df2 <- tibble(x = 2, y = factor("b")) out <- full_join(df1, df2, by = c("x", "y")) expect_equal(out$y, factor(c("a", "b"))) }) test_that("keys of non-equi conditions are not coerced if `keep = NULL`", { foo <- tibble(id = factor(c("a", "b")), col1 = c(1, 2), var1 = "foo") bar <- tibble(id = c("a", "b"), col2 = c(1L, 2L), var2 = "bar") out <- inner_join(foo, bar, by = join_by(id, col1 >= col2)) expect_type(out$id, "character") expect_type(out$col1, "double") expect_type(out$col2, "integer") out <- inner_join(bar, foo, by = join_by(id, col2 <= col1)) expect_type(out$id, "character") expect_type(out$col1, "double") expect_type(out$col2, "integer") }) test_that("when keep = TRUE, left_join() preserves both sets of keys", { # when keys have different names df1 <- tibble(a = c(2, 3), b = c(1, 2)) df2 <- tibble(x = c(3, 4), y = c(3, 4)) out <- left_join(df1, df2, by = c("a" = "x"), keep = TRUE) expect_equal(out$a, c(2, 3)) expect_equal(out$x, c(NA, 3)) # when keys have same name df1 <- tibble(a = c(2, 3), b = c(1, 2)) df2 <- tibble(a = c(3, 4), y = c(3, 4)) out <- left_join(df1, df2, by = c("a"), keep = TRUE) expect_equal(out$a.x, c(2, 3)) expect_equal(out$a.y, c(NA, 3)) }) test_that("when keep = TRUE, right_join() preserves both sets of keys", { # when keys have different names df1 <- tibble(a = c(2, 3), b = c(1, 2)) df2 <- tibble(x = c(3, 4), y = c(3, 4)) out <- right_join(df1, df2, by = c("a" = "x"), keep = TRUE) expect_equal(out$a, c(3, NA)) expect_equal(out$x, c(3, 4)) # when keys have same name df1 <- tibble(a = c(2, 3), b = c(1, 2)) df2 <- tibble(a = c(3, 4), y = c(3, 4)) out <- right_join(df1, df2, by = c("a"), keep = TRUE) expect_equal(out$a.x, c(3, NA)) expect_equal(out$a.y, c(3, 4)) }) test_that("when keep = TRUE, full_join() preserves both sets of keys", { # when keys have different names df1 <- tibble(a = c(2, 3), b = c(1, 2)) df2 <- tibble(x = c(3, 4), y = c(3, 4)) out <- full_join(df1, df2, by = c("a" = "x"), keep = TRUE) expect_equal(out$a, c(2, 3, NA)) expect_equal(out$x, c(NA, 3, 4)) # when keys have same name df1 <- tibble(a = c(2, 3), b = c(1, 2)) df2 <- tibble(a = c(3, 4), y = c(3, 4)) out <- full_join(df1, df2, by = c("a"), keep = TRUE) expect_equal(out$a.x, c(2, 3, NA)) expect_equal(out$a.y, c(NA, 3, 4)) }) test_that("when keep = TRUE, inner_join() preserves both sets of keys (#5581)", { # when keys have different names df1 <- tibble(a = c(2, 3), b = c(1, 2)) df2 <- tibble(x = c(3, 4), y = c(3, 4)) out <- inner_join(df1, df2, by = c("a" = "x"), keep = TRUE) expect_equal(out$a, c(3)) expect_equal(out$x, c(3)) # when keys have same name df1 <- tibble(a = c(2, 3), b = c(1, 2)) df2 <- tibble(a = c(3, 4), y = c(3, 4)) out <- inner_join(df1, df2, by = c("a"), keep = TRUE) expect_equal(out$a.x, c(3)) expect_equal(out$a.y, c(3)) }) test_that("can't use `keep = FALSE` with non-equi conditions (#6499)", { df1 <- tibble(xl = c(1, 3), xu = c(4, 7)) df2 <- tibble(yl = c(2, 5, 8), yu = c(6, 8, 9)) expect_snapshot(error = TRUE, { left_join(df1, df2, join_by(overlaps(xl, xu, yl, yu)), keep = FALSE) }) # Would never make sense here. # Based on how the binary conditions are generated we'd merge: # - `yu` into `xl` # - `yl` into `xu` # Which results in `xl` and `xu` columns that don't maintain `xl <= xu`. expect_snapshot(error = TRUE, { full_join(df1, df2, join_by(overlaps(xl, xu, yl, yu)), keep = FALSE) }) }) test_that("joins matches NAs by default (#892, #2033)", { df1 <- tibble(x = c(NA_character_, 1)) df2 <- tibble(x = c(NA_character_, 2)) expect_equal(nrow(inner_join(df1, df2, by = "x")), 1) expect_equal(nrow(semi_join(df1, df2, by = "x")), 1) }) test_that("joins don't match NA when na_matches = 'never' (#2033)", { df1 <- tibble(a = c(1, NA)) df2 <- tibble(a = c(1, NA), b = 1:2) out <- left_join(df1, df2, by = "a", na_matches = "never") expect_equal(out, tibble(a = c(1, NA), b = c(1, NA))) out <- inner_join(df1, df2, by = "a", na_matches = "never") expect_equal(out, tibble(a = 1, b = 1)) out <- semi_join(df1, df2, by = "a", na_matches = "never") expect_equal(out, tibble(a = 1)) out <- anti_join(df1, df2, by = "a", na_matches = "never") expect_equal(out, tibble(a = NA_integer_)) out <- nest_join(df1, df2, by = "a", na_matches = "never") expect <- tibble(a = c(1, NA), df2 = list(tibble(b = 1L), tibble(b = integer()))) expect_equal(out, expect) dat1 <- tibble( name = c("a", "c"), var1 = c(1, 2) ) dat3 <- tibble( name = c("a", NA_character_), var3 = c(5, 6) ) expect_equal( full_join(dat1, dat3, by = "name", na_matches = "never"), tibble(name = c("a", "c", NA), var1 = c(1, 2, NA), var3 = c(5, NA, 6)) ) }) test_that("`left_join(by = join_by(closest(...)))` works as expected", { df1 <- tibble(x = 1:5) df2 <- tibble(y = c(1, 2, 4)) out <- left_join(df1, df2, by = join_by(closest(x <= y))) expect_identical(out$x, 1:5) expect_identical(out$y, c(1, 2, 4, 4, NA)) out <- left_join(df1, df2, by = join_by(closest(x < y))) expect_identical(out$x, 1:5) expect_identical(out$y, c(2, 4, 4, NA, NA)) out <- left_join(df1, df2, by = join_by(closest(x >= y))) expect_identical(out$x, 1:5) expect_identical(out$y, c(1, 2, 2, 4, 4)) out <- left_join(df1, df2, by = join_by(closest(x > y))) expect_identical(out$x, 1:5) expect_identical(out$y, c(NA, 1, 2, 2, 4)) }) test_that("`full_join(by = join_by(closest(...)))` works as expected", { df1 <- tibble(x = 1:5) df2 <- tibble(y = c(1, 2, 4)) out <- full_join(df1, df2, by = join_by(closest(x <= y))) expect_identical(out$x, 1:5) expect_identical(out$y, c(1, 2, 4, 4, NA)) out <- full_join(df1, df2, by = join_by(closest(x < y))) expect_identical(out$x, c(1:5, NA)) expect_identical(out$y, c(2, 4, 4, NA, NA, 1)) out <- full_join(df1, df2, by = join_by(closest(x >= y))) expect_identical(out$x, 1:5) expect_identical(out$y, c(1, 2, 2, 4, 4)) out <- full_join(df1, df2, by = join_by(closest(x > y))) expect_identical(out$x, 1:5) expect_identical(out$y, c(NA, 1, 2, 2, 4)) }) test_that("`right_join(by = join_by(closest(...)))` works as expected", { df1 <- tibble(x = 1:5) df2 <- tibble(y = c(1, 2, 4)) out <- right_join(df1, df2, by = join_by(closest(x <= y))) expect_identical(out$x, 1:4) expect_identical(out$y, c(1, 2, 4, 4)) out <- right_join(df1, df2, by = join_by(closest(x < y))) expect_identical(out$x, c(1:3, NA)) expect_identical(out$y, c(2, 4, 4, 1)) out <- right_join(df1, df2, by = join_by(closest(x >= y))) expect_identical(out$x, 1:5) expect_identical(out$y, c(1, 2, 2, 4, 4)) out <- right_join(df1, df2, by = join_by(closest(x > y))) expect_identical(out$x, 2:5) expect_identical(out$y, c(1, 2, 2, 4)) }) test_that("`inner_join(by = join_by(closest(...)))` works as expected", { df1 <- tibble(x = 1:5) df2 <- tibble(y = c(1, 2, 4)) out <- inner_join(df1, df2, by = join_by(closest(x <= y))) expect_identical(out$x, 1:4) expect_identical(out$y, c(1, 2, 4, 4)) out <- inner_join(df1, df2, by = join_by(closest(x < y))) expect_identical(out$x, 1:3) expect_identical(out$y, c(2, 4, 4)) out <- inner_join(df1, df2, by = join_by(closest(x >= y))) expect_identical(out$x, 1:5) expect_identical(out$y, c(1, 2, 2, 4, 4)) out <- inner_join(df1, df2, by = join_by(closest(x > y))) expect_identical(out$x, 2:5) expect_identical(out$y, c(1, 2, 2, 4)) }) test_that("joins using `between(bounds =)` work as expected (#6488)", { df1 <- tibble(x = 1:5) df2 <- tibble(lower = 2, upper = 4) out <- full_join(df1, df2, by = join_by(between(x, lower, upper, bounds = "[]"))) expect_identical(out$lower, c(NA, 2, 2, 2, NA)) expect_identical(out$upper, c(NA, 4, 4, 4, NA)) out <- full_join(df1, df2, by = join_by(between(x, lower, upper, bounds = "[)"))) expect_identical(out$lower, c(NA, 2, 2, NA, NA)) expect_identical(out$upper, c(NA, 4, 4, NA, NA)) out <- full_join(df1, df2, by = join_by(between(x, lower, upper, bounds = "(]"))) expect_identical(out$lower, c(NA, NA, 2, 2, NA)) expect_identical(out$upper, c(NA, NA, 4, 4, NA)) out <- full_join(df1, df2, by = join_by(between(x, lower, upper, bounds = "()"))) expect_identical(out$lower, c(NA, NA, 2, NA, NA)) expect_identical(out$upper, c(NA, NA, 4, NA, NA)) }) test_that("joins using `overlaps(bounds =)` work as expected (#6488)", { df1 <- tibble(x_lower = c(1, 1, 3, 4), x_upper = c(2, 3, 4, 5)) df2 <- tibble(y_lower = 2, y_upper = 4) expect_closed <- vec_cbind(df1, vec_c(df2, df2, df2, df2)) out <- full_join(df1, df2, by = join_by(overlaps(x_lower, x_upper, y_lower, y_upper, bounds = "[]"))) expect_identical(out, expect_closed) # `[)`, `(]`, and `()` all generate the same binary conditions but are useful # for consistency with `between(bounds =)` expect_open <- vec_cbind(df1, vec_c(NA, df2, df2, NA)) out <- full_join(df1, df2, by = join_by(overlaps(x_lower, x_upper, y_lower, y_upper, bounds = "[)"))) expect_identical(out, expect_open) out <- full_join(df1, df2, by = join_by(overlaps(x_lower, x_upper, y_lower, y_upper, bounds = "(]"))) expect_identical(out, expect_open) out <- full_join(df1, df2, by = join_by(overlaps(x_lower, x_upper, y_lower, y_upper, bounds = "()"))) expect_identical(out, expect_open) }) test_that("join_mutate() validates arguments", { df <- tibble(x = 1) # Mutating joins expect_snapshot(error = TRUE, { join_mutate(df, df, by = 1, type = "left") join_mutate(df, df, by = "x", type = "left", suffix = 1) join_mutate(df, df, by = "x", type = "left", na_matches = "foo") join_mutate(df, df, by = "x", type = "left", keep = 1) }) }) test_that("join_filter() validates arguments", { df <- tibble(x = 1) # Filtering joins expect_snapshot(error = TRUE, { join_filter(df, df, by = 1, type = "semi") join_filter(df, df, by = "x", type = "semi", na_matches = "foo") }) }) test_that("mutating joins trigger many-to-many warning", { df <- tibble(x = c(1, 1)) expect_snapshot(out <- left_join(df, df, join_by(x))) }) test_that("mutating joins don't trigger many-to-many warning when called indirectly", { df <- tibble(x = c(1, 1)) fn <- function(df1, df2, relationship = NULL) { left_join(df1, df2, join_by(x), relationship = relationship) } # Directly calling `left_join()` from a function you control results in a warning expect_warning(fn(df, df), class = "dplyr_warning_join_relationship_many_to_many") # Now mimic calling an "rlang function" which you don't control that calls `left_join()` fn_env(fn) <- ns_env("rlang") # Indirectly calling `left_join()` through a function you don't control # doesn't warn expect_no_warning(fn(df, df), class = "dplyr_warning_join_relationship_many_to_many") }) test_that("mutating joins compute common columns", { df1 <- tibble(x = c(1, 2), y = c(2, 3)) df2 <- tibble(x = c(1, 3), z = c(2, 3)) expect_snapshot(out <- left_join(df1, df2)) }) test_that("filtering joins compute common columns", { df1 <- tibble(x = c(1, 2), y = c(2, 3)) df2 <- tibble(x = c(1, 3), z = c(2, 3)) expect_snapshot(out <- semi_join(df1, df2)) }) test_that("mutating joins finalize unspecified columns (#6804)", { df1 <- tibble(x = NA) df2 <- tibble(x = NA) expect_identical( inner_join(df1, df2, by = join_by(x)), tibble(x = NA) ) expect_identical( inner_join(df1, df2, by = join_by(x), na_matches = "never"), tibble(x = logical()) ) # Pre-existing `unspecified()` vectors get finalized, because they are # considered internal types and we took a "common type" between the keys df1 <- tibble(x = unspecified()) df2 <- tibble(x = unspecified()) expect_identical( inner_join(df1, df2, by = join_by(x)), tibble(x = logical()) ) }) test_that("filtering joins finalize unspecified columns (#6804)", { df1 <- tibble(x = NA) df2 <- tibble(x = NA) expect_identical( semi_join(df1, df2, by = join_by(x)), tibble(x = NA) ) expect_identical( semi_join(df1, df2, by = join_by(x), na_matches = "never"), tibble(x = logical()) ) # Pre-existing `unspecified()` vectors aren't finalized, # because we don't take the common type of the keys. # We retain the exact type of `x`. df1 <- tibble(x = unspecified()) df2 <- tibble(x = NA) expect_identical( semi_join(df1, df2, by = join_by(x)), tibble(x = unspecified()) ) }) test_that("mutating joins reference original column in `y` when there are type errors (#6465)", { x <- tibble(a = 1) y <- tibble(b = "1") expect_snapshot({ (expect_error(left_join(x, y, by = join_by(a == b)))) }) }) test_that("filtering joins reference original column in `y` when there are type errors (#6465)", { x <- tibble(a = 1) y <- tibble(b = "1") expect_snapshot({ (expect_error(semi_join(x, y, by = join_by(a == b)))) }) }) test_that("error if passed additional arguments", { df1 <- data.frame(a = 1:3) df2 <- data.frame(a = 1) expect_snapshot(error = TRUE, { inner_join(df1, df2, on = "a") left_join(df1, df2, on = "a") right_join(df1, df2, on = "a") full_join(df1, df2, on = "a") nest_join(df1, df2, on = "a") anti_join(df1, df2, on = "a") semi_join(df1, df2, on = "a") }) }) # nest_join --------------------------------------------------------------- test_that("nest_join returns list of tibbles (#3570)",{ df1 <- tibble(x = c(1, 2), y = c(2, 3)) df2 <- tibble(x = c(1, 1), z = c(2, 3)) out <- nest_join(df1, df2, by = "x") expect_named(out, c("x", "y", "df2")) expect_type(out$df2, "list") expect_s3_class(out$df2[[1]], "tbl_df") }) test_that("nest_join respects types of y (#6295)",{ df1 <- tibble(x = c(1, 2), y = c(2, 3)) df2 <- rowwise(tibble(x = c(1, 1), z = c(2, 3))) out <- nest_join(df1, df2, by = "x") expect_s3_class(out$df2[[1]], "rowwise_df") }) test_that("nest_join preserves data frame attributes on `x` and `y` (#6295)", { df1 <- data.frame(x = c(1, 2), y = c(3, 4)) attr(df1, "foo") <- 1 df2 <- data.frame(x = c(1, 2), z = c(3, 4)) attr(df2, "foo") <- 2 out <- nest_join(df1, df2, by = "x") expect_identical(attr(out, "foo"), 1) expect_identical(attr(out$df2[[1]], "foo"), 2) }) test_that("nest_join computes common columns", { df1 <- tibble(x = c(1, 2), y = c(2, 3)) df2 <- tibble(x = c(1, 3), z = c(2, 3)) expect_snapshot(out <- nest_join(df1, df2)) }) test_that("nest_join finalizes unspecified columns (#6804)", { df1 <- tibble(x = NA) df2 <- tibble(x = NA) expect_identical( nest_join(df1, df2, by = join_by(x)), tibble(x = NA, df2 = list(tibble(.rows = 1L))) ) expect_identical( nest_join(df1, df2, by = join_by(x), keep = TRUE), tibble(x = NA, df2 = list(tibble(x = NA))) ) expect_identical( nest_join(df1, df2, by = join_by(x), na_matches = "never"), tibble(x = NA, df2 = list(tibble())) ) # Pre-existing `unspecified()` vectors get finalized, because they are # considered internal types and we took a "common type" between the keys df1 <- tibble(x = unspecified()) df2 <- tibble(x = unspecified()) expect_identical( nest_join(df1, df2, by = join_by(x)), tibble(x = logical(), df2 = list()) ) }) test_that("nest_join references original column in `y` when there are type errors (#6465)", { x <- tibble(a = 1) y <- tibble(b = "1") expect_snapshot({ (expect_error(nest_join(x, y, by = join_by(a == b)))) }) }) test_that("nest_join handles multiple matches in x (#3642)", { df1 <- tibble(x = c(1, 1)) df2 <- tibble(x = 1, y = 1:2) out <- nest_join(df1, df2, by = "x") expect_equal(out$df2[[1]], out$df2[[2]]) }) test_that("nest_join forces `multiple = 'all'` internally (#6392)", { df1 <- tibble(x = 1) df2 <- tibble(x = 1, y = 1:2) expect_no_warning(out <- nest_join(df1, df2, by = "x")) expect_identical(nrow(out$df2[[1]]), 2L) }) test_that("y keys dropped by default for equi conditions", { df1 <- tibble(x = c(1, 2), y = c(2, 3)) df2 <- tibble(x = c(1, 3), z = c(2, 3)) out <- nest_join(df1, df2, by = "x") expect_named(out, c("x", "y", "df2")) expect_named(out$df2[[1]], "z") out <- nest_join(df1, df2, by = "x", keep = TRUE) expect_named(out$df2[[1]], c("x", "z")) }) test_that("y keys kept by default for non-equi conditions", { df1 <- tibble(x = c(1, 2), y = c(2, 3)) df2 <- tibble(x = c(1, 3), z = c(2, 3)) out <- nest_join(df1, df2, by = join_by(x >= x)) expect_named(out, c("x", "y", "df2")) expect_named(out$df2[[1]], c("x", "z")) }) test_that("validates inputs", { df1 <- tibble(x = c(1, 2), y = c(2, 3)) df2 <- tibble(x = c(1, 3), z = c(2, 3)) expect_snapshot(error = TRUE, { nest_join(df1, df2, by = 1) nest_join(df1, df2, keep = 1) nest_join(df1, df2, name = 1) nest_join(df1, df2, na_matches = 1) }) }) # output type --------------------------------------------------------------- test_that("joins x preserve type of x", { df1 <- data.frame(x = 1) df2 <- tibble(x = 2) expect_s3_class(inner_join(df1, df2, by = "x"), "data.frame", exact = TRUE) expect_s3_class(inner_join(df2, df1, by = "x"), "tbl_df") }) test_that("joins preserve groups", { gf1 <- tibble(a = 1:3) %>% group_by(a) gf2 <- tibble(a = rep(1:4, 2), b = 1) %>% group_by(b) i <- count_regroups(out <- inner_join(gf1, gf2, by = "a")) expect_equal(i, 1L) expect_equal(group_vars(out), "a") i <- count_regroups(out <- semi_join(gf1, gf2, by = "a")) expect_equal(i, 0L) expect_equal(group_vars(out), "a") # once for x + once for each row for y i <- count_regroups(out <- nest_join(gf1, gf2, by = "a")) expect_equal(i, 4L) expect_equal(group_vars(out), "a") expect_equal(group_vars(out$gf2[[1]]), "b") }) test_that("joins respect zero length groups", { df1 <- tibble(f = factor( c(1,1,2,2), levels = 1:3), x = c(1,2,1,4)) %>% group_by(f) df2 <- tibble(f = factor( c(2,2,3,3), levels = 1:3), y = c(1,2,3,4)) %>% group_by(f) expect_equal(group_size(left_join( df1, df2, by = "f", relationship = "many-to-many")), c(2,4)) expect_equal(group_size(right_join( df1, df2, by = "f", relationship = "many-to-many")), c(4,2)) expect_equal(group_size(full_join( df1, df2, by = "f", relationship = "many-to-many")), c(2,4,2)) expect_equal(group_size(anti_join( df1, df2, by = "f")), c(2)) expect_equal(group_size(inner_join( df1, df2, by = "f", relationship = "many-to-many")), c(4)) df1 <- tibble(f = factor( c(1,1,2,2), levels = 1:3), x = c(1,2,1,4)) %>% group_by(f, .drop = FALSE) df2 <- tibble(f = factor( c(2,2,3,3), levels = 1:3), y = c(1,2,3,4)) %>% group_by(f, .drop = FALSE) expect_equal(group_size(left_join( df1, df2, by = "f", relationship = "many-to-many")), c(2,4,0)) expect_equal(group_size(right_join( df1, df2, by = "f", relationship = "many-to-many")), c(0,4,2)) expect_equal(group_size(full_join( df1, df2, by = "f", relationship = "many-to-many")), c(2,4,2)) expect_equal(group_size(anti_join( df1, df2, by = "f")), c(2,0,0)) expect_equal(group_size(inner_join( df1, df2, by = "f", relationship = "many-to-many")), c(0,4,0)) }) test_that("group column names reflect renamed duplicate columns (#2330)", { df1 <- tibble(x = 1:5, y = 1:5) %>% group_by(x, y) df2 <- tibble(x = 1:5, y = 1:5) out <- inner_join(df1, df2, by = "x") expect_equal(group_vars(out), "x") # TODO: fix this issue: https://github.com/tidyverse/dplyr/issues/4917 # expect_equal(group_vars(out), c("x", "y.x")) }) test_that("rowwise group structure is updated after a join (#5227)", { df1 <- rowwise(tibble(x = 1:2)) df2 <- tibble(x = c(1:2, 2L)) x <- left_join(df1, df2, by = "x") expect_identical(group_rows(x), list_of(1L, 2L, 3L)) }) # deprecated ---------------------------------------------------------------- test_that("by = character() generates cross (#4206)", { local_options(lifecycle_verbosity = "quiet") df1 <- tibble(x = 1:2) df2 <- tibble(y = 1:2) out <- left_join(df1, df2, by = character()) expect_equal(out$x, rep(1:2, each = 2)) expect_equal(out$y, rep(1:2, 2)) }) test_that("`by = character()` technically respects `unmatched`", { local_options(lifecycle_verbosity = "quiet") df1 <- tibble() df2 <- tibble(x = 1) expect_snapshot(error = TRUE, { left_join(df1, df2, by = character(), unmatched = "error") }) }) test_that("`by = character()` technically respects `relationship`", { local_options(lifecycle_verbosity = "quiet") df <- tibble(x = 1:2) expect_snapshot(error = TRUE, { left_join(df, df, by = character(), relationship = "many-to-one") }) }) test_that("`by = character()` for a cross join is deprecated (#6604)", { df1 <- tibble(x = 1:2) df2 <- tibble(y = 1:2) # Mutating join expect_snapshot({ out <- left_join(df1, df2, by = character()) }) # Filtering join expect_snapshot({ out <- semi_join(df1, df2, by = character()) }) # Nest join expect_snapshot({ out <- nest_join(df1, df2, by = character()) }) }) test_that("`by = named character()` for a cross join works", { # Used by the sift package df1 <- tibble(x = 1:2) df2 <- tibble(y = 1:2) by <- set_names(character(), nm = character()) expect_snapshot({ out <- left_join(df1, df2, by = by) }) expect_identical( out, cross_join(df1, df2) ) }) test_that("`by = list(x = character(), y = character())` for a cross join is deprecated (#6604)", { df1 <- tibble(x = 1:2) df2 <- tibble(y = 1:2) expect_snapshot({ out <- left_join(df1, df2, by = list(x = character(), y = character())) }) }) dplyr/tests/testthat/test-desc.R0000644000176200001440000000013414366556340016436 0ustar liggesuserstest_that("errors cleanly on non-vectors", { expect_snapshot(desc(mean), error = TRUE) }) dplyr/tests/testthat/_snaps/0000755000176200001440000000000014525503021015666 5ustar liggesusersdplyr/tests/testthat/_snaps/by.md0000644000176200001440000000126114416000507016622 0ustar liggesusers# throws tidyselect errors Code compute_by(by = y, data = df) Condition Error: ! Can't subset columns that don't exist. x Column `y` doesn't exist. # can't set `.by` with a grouped-df Code compute_by(x, gdf) Condition Error: ! Can't supply `by` when `data` is a grouped data frame. # can't set `.by` with a rowwise-df Code compute_by(x, rdf) Condition Error: ! Can't supply `by` when `data` is a rowwise data frame. # can tweak the error args Code compute_by(x, gdf, by_arg = "x", data_arg = "dat") Condition Error: ! Can't supply `x` when `dat` is a grouped data frame. dplyr/tests/testthat/_snaps/summarise.md0000644000176200001440000002110214416000544020212 0ustar liggesusers# can't overwrite column active bindings (#6666) Code summarise(df, y = { x <<- x + 2L mean(x) }) Condition Error in `summarise()`: i In argument: `y = { ... }`. Caused by error: ! unused argument (base::quote(3:6)) --- Code summarise(df, .by = g, y = { x <<- x + 2L mean(x) }) Condition Error in `summarise()`: i In argument: `y = { ... }`. i In group 1: `g = 1`. Caused by error: ! unused argument (base::quote(3:4)) --- Code summarise(gdf, y = { x <<- x + 2L mean(x) }) Condition Error in `summarise()`: i In argument: `y = { ... }`. i In group 1: `g = 1`. Caused by error: ! unused argument (base::quote(3:4)) # can't use `.by` with `.groups` Code summarise(df, .by = x, .groups = "drop") Condition Error in `summarise()`: ! Can't supply both `.by` and `.groups`. # catches `.by` with grouped-df Code summarise(gdf, .by = x) Condition Error in `summarise()`: ! Can't supply `.by` when `.data` is a grouped data frame. # catches `.by` with rowwise-df Code summarise(rdf, .by = x) Condition Error in `summarise()`: ! Can't supply `.by` when `.data` is a rowwise data frame. # `summarise()` doesn't allow data frames with missing or empty names (#6758) Code summarise(df1) Condition Error in `summarise()`: ! Can't transform a data frame with `NA` or `""` names. --- Code summarise(df2) Condition Error in `summarise()`: ! Can't transform a data frame with `NA` or `""` names. # summarise() gives meaningful errors Code tibble(x = 1, y = 2) %>% group_by(x, y) %>% summarise() Message `summarise()` has grouped output by 'x'. You can override using the `.groups` argument. Output # A tibble: 1 x 2 # Groups: x [1] x y 1 1 2 Code tibble(x = 1, y = 2) %>% rowwise(x, y) %>% summarise() Message `summarise()` has grouped output by 'x', 'y'. You can override using the `.groups` argument. Output # A tibble: 1 x 2 # Groups: x, y [1] x y 1 1 2 Code tibble(x = 1, y = 2) %>% rowwise() %>% summarise() Output # A tibble: 1 x 0 --- Code (expect_error(tibble(x = 1, y = c(1, 2, 2), z = runif(3)) %>% summarise(a = rlang::env( a = 1)))) Output Error in `summarise()`: i In argument: `a = rlang::env(a = 1)`. Caused by error: ! `a` must be a vector, not an environment. Code (expect_error(tibble(x = 1, y = c(1, 2, 2), z = runif(3)) %>% group_by(x, y) %>% summarise(a = rlang::env(a = 1)))) Output Error in `summarise()`: i In argument: `a = rlang::env(a = 1)`. i In group 1: `x = 1`, `y = 1`. Caused by error: ! `a` must be a vector, not an environment. Code (expect_error(tibble(x = 1, y = c(1, 2, 2), z = runif(3)) %>% rowwise() %>% summarise(a = lm(y ~ x)))) Output Error in `summarise()`: i In argument: `a = lm(y ~ x)`. i In row 1. Caused by error: ! `a` must be a vector, not a object. i Did you mean: `a = list(lm(y ~ x))` ? Code (expect_error(tibble(id = 1:2, a = list(1, "2")) %>% group_by(id) %>% summarise( a = a[[1]]))) Output Error in `summarise()`: i In argument: `a = a[[1]]`. Caused by error: ! `a` must return compatible vectors across groups. i Result of type for group 1: `id = 1`. i Result of type for group 2: `id = 2`. Code (expect_error(tibble(id = 1:2, a = list(1, "2")) %>% rowwise() %>% summarise(a = a[[ 1]]))) Output Error in `summarise()`: i In argument: `a = a[[1]]`. Caused by error: ! `a` must return compatible vectors across groups. Code (expect_error(tibble(z = 1) %>% summarise(x = 1:3, y = 1:2))) Output Error in `summarise()`: ! Can't recycle `y = 1:2`. Caused by error: ! `y` must be size 3 or 1, not 2. i An earlier column had size 3. Code (expect_error(tibble(z = 1:2) %>% group_by(z) %>% summarise(x = 1:3, y = 1:2))) Output Error in `summarise()`: ! Can't recycle `y = 1:2`. i In group 1: `z = 1`. Caused by error: ! `y` must be size 3 or 1, not 2. i An earlier column had size 3. Code (expect_error(tibble(z = c(1, 3)) %>% group_by(z) %>% summarise(x = seq_len(z), y = 1:2))) Output Error in `summarise()`: ! Can't recycle `y = 1:2`. i In group 2: `z = 3`. Caused by error: ! `y` must be size 3 or 1, not 2. i An earlier column had size 3. Code (expect_error(data.frame(x = 1:2, g = 1:2) %>% group_by(g) %>% summarise(x = if ( g == 1) 42))) Output Error in `summarise()`: i In argument: `x = if (g == 1) 42`. i In group 2: `g = 2`. Caused by error: ! `x` must return compatible vectors across groups. x Can't combine NULL and non NULL results. Code (expect_error(data.frame(x = 1:2, g = 1:2) %>% group_by(g) %>% summarise(x = if ( g == 2) 42))) Output Error in `summarise()`: i In argument: `x = if (g == 2) 42`. i In group 1: `g = 1`. Caused by error: ! `x` must return compatible vectors across groups. x Can't combine NULL and non NULL results. Code (expect_error(summarise(tibble(a = 1), c = .data$b))) Output Error in `summarise()`: i In argument: `c = .data$b`. Caused by error in `.data$b`: ! Column `b` not found in `.data`. Code (expect_error(summarise(group_by(tibble(a = 1:3), a), c = .data$b))) Output Error in `summarise()`: i In argument: `c = .data$b`. i In group 1: `a = 1`. Caused by error in `.data$b`: ! Column `b` not found in `.data`. Code (expect_error(tibble(x = 1, x = 1, .name_repair = "minimal") %>% summarise(x))) Output Error in `summarise()`: ! Can't transform a data frame with duplicate names. Code (expect_error(tibble() %>% summarise(stop("{")))) Output Error in `summarise()`: i In argument: `stop("{")`. Caused by error: ! { Code (expect_error(tibble(a = 1, b = "{value:1, unit:a}") %>% group_by(b) %>% summarise(a = stop("!")))) Output Error in `summarise()`: i In argument: `a = stop("!")`. i In group 1: `b = "{value:1, unit:a}"`. Caused by error: ! ! # non-summary results are deprecated in favor of `reframe()` (#6382) Code out <- summarise(df, x = which(x < 3)) Condition Warning: Returning more (or less) than 1 row per `summarise()` group was deprecated in dplyr 1.1.0. i Please use `reframe()` instead. i When switching from `summarise()` to `reframe()`, remember that `reframe()` always returns an ungrouped data frame and adjust accordingly. --- Code out <- summarise(df, x = which(x < 3), .by = g) Condition Warning: Returning more (or less) than 1 row per `summarise()` group was deprecated in dplyr 1.1.0. i Please use `reframe()` instead. i When switching from `summarise()` to `reframe()`, remember that `reframe()` always returns an ungrouped data frame and adjust accordingly. --- Code out <- summarise(gdf, x = which(x < 3)) Condition Warning: Returning more (or less) than 1 row per `summarise()` group was deprecated in dplyr 1.1.0. i Please use `reframe()` instead. i When switching from `summarise()` to `reframe()`, remember that `reframe()` always returns an ungrouped data frame and adjust accordingly. --- Code out <- summarise(rdf, x = which(x < 3)) Condition Warning: Returning more (or less) than 1 row per `summarise()` group was deprecated in dplyr 1.1.0. i Please use `reframe()` instead. i When switching from `summarise()` to `reframe()`, remember that `reframe()` always returns an ungrouped data frame and adjust accordingly. dplyr/tests/testthat/_snaps/sample.md0000644000176200001440000000506014416000537017475 0ustar liggesusers# sample_*() gives meaningful error messages Code df2 <- tibble(x = rep(1:2, 100), y = rep(c(0, 1), 100), g = rep(1:2, each = 100)) grp <- df2 %>% group_by(g) (expect_error(sample_n(grp, nrow(df2) / 2, weight = y))) Output Error in `sample_n()`: ! Can't compute indices. i In group 1: `g = 1`. Caused by error in `sample.int()`: ! too few positive probabilities Code (expect_error(sample_frac(grp, 1, weight = y))) Output Error in `sample_frac()`: ! Can't compute indices. i In group 1: `g = 1`. Caused by error in `sample.int()`: ! too few positive probabilities Code (expect_error(mtcars %>% group_by(cyl) %>% sample_n(10))) Output Error in `sample_n()`: ! Can't compute indices. i In group 2: `cyl = 6`. Caused by error: ! `size` must be less than or equal to 7 (size of data). i set `replace = TRUE` to use sampling with replacement. Code (expect_error(sample_n(list()))) Output Error in `sample_n()`: ! `tbl` must be a data frame, not an empty list. Code (expect_error(sample_frac(list()))) Output Error in `sample_frac()`: ! `tbl` must be a data frame, not an empty list. Code # # respects weight df <- data.frame(x = 1:2, y = c(0, 1)) (expect_error(sample_n(df, 2, weight = y))) Output Error in `sample_n()`: ! Can't compute indices. Caused by error in `sample.int()`: ! too few positive probabilities Code (expect_error(sample_frac(df, 2))) Output Error in `sample_frac()`: ! Can't compute indices. Caused by error: ! `size` of sampled fraction must be less or equal to one. i set `replace = TRUE` to use sampling with replacement. Code (expect_error(sample_frac(df %>% group_by(y), 2))) Output Error in `sample_frac()`: ! Can't compute indices. i In group 1: `y = 0`. Caused by error: ! `size` of sampled fraction must be less or equal to one. i set `replace = TRUE` to use sampling with replacement. Code (expect_error(sample_frac(df, 1, weight = y))) Output Error in `sample_frac()`: ! Can't compute indices. Caused by error in `sample.int()`: ! too few positive probabilities dplyr/tests/testthat/_snaps/mutate.md0000644000176200001440000002404514420040343017511 0ustar liggesusers# mutate() supports constants (#6056, #6305) Code (expect_error(df %>% mutate(z = !!z))) Output Error in `mutate()`: i In argument: `z = `. Caused by error: ! Inlined constant `z` must be size 10 or 1, not 5. Code (expect_error(df %>% group_by(g) %>% mutate(z = !!z))) Output Error in `mutate()`: i In argument: `z = `. Caused by error: ! Inlined constant `z` must be size 10 or 1, not 5. Code (expect_error(df %>% rowwise() %>% mutate(z = !!z))) Output Error in `mutate()`: i In argument: `z = `. Caused by error: ! Inlined constant `z` must be size 10 or 1, not 5. --- Code (expect_error(df %>% group_by(g) %>% mutate(y = .env$y))) Output Error in `mutate()`: i In argument: `y = .env$y`. i In group 1: `g = 1`. Caused by error: ! `y` must be size 5 or 1, not 10. Code (expect_error(df %>% rowwise() %>% mutate(y = .env$y))) Output Error in `mutate()`: i In argument: `y = .env$y`. i In row 1. Caused by error: ! `y` must be size 1, not 10. i Did you mean: `y = list(.env$y)` ? # can't overwrite column active bindings (#6666) Code mutate(df, y = { x <<- 2 x }) Condition Error in `mutate()`: i In argument: `y = { ... }`. Caused by error: ! unused argument (base::quote(2)) --- Code mutate(df, .by = g, y = { x <<- 2 x }) Condition Error in `mutate()`: i In argument: `y = { ... }`. i In group 1: `g = 1`. Caused by error: ! unused argument (base::quote(2)) --- Code mutate(gdf, y = { x <<- 2 x }) Condition Error in `mutate()`: i In argument: `y = { ... }`. i In group 1: `g = 1`. Caused by error: ! unused argument (base::quote(2)) # can't share local variables across expressions (#6666) Code mutate(df, x2 = { foo <- x x }, y2 = { foo }) Condition Error in `mutate()`: i In argument: `y2 = { ... }`. Caused by error: ! object 'foo' not found # rowwise mutate un-lists existing size-1 list-columns (#6302) Code mutate(df, y = x) Condition Error in `mutate()`: i In argument: `y = x`. i In row 2. Caused by error: ! `y` must be size 1, not 2. i Did you mean: `y = list(x)` ? # catches `.by` with grouped-df Code mutate(gdf, .by = x) Condition Error in `mutate()`: ! Can't supply `.by` when `.data` is a grouped data frame. # catches `.by` with rowwise-df Code mutate(rdf, .by = x) Condition Error in `mutate()`: ! Can't supply `.by` when `.data` is a rowwise data frame. # mutate() deals with 0 groups (#5534) Code mutate(df, y = max(x)) Condition Warning: There was 1 warning in `mutate()`. i In argument: `y = max(x)`. Caused by warning in `max()`: ! no non-missing arguments to max; returning -Inf Output # A tibble: 0 x 2 # Groups: x [0] # i 2 variables: x , y # mutate() give meaningful errors Code tbl <- tibble(x = 1:2, y = 1:2) (expect_error(tbl %>% mutate(y = NULL, a = sum(y)))) Output Error in `mutate()`: i In argument: `a = sum(y)`. Caused by error: ! object 'y' not found Code (expect_error(tbl %>% group_by(x) %>% mutate(y = NULL, a = sum(y)))) Output Error in `mutate()`: i In argument: `a = sum(y)`. i In group 1: `x = 1`. Caused by error: ! object 'y' not found Code (expect_error(tibble(x = 1) %>% mutate(y = mean))) Output Error in `mutate()`: i In argument: `y = mean`. Caused by error: ! `y` must be a vector, not a function. Code df <- tibble(g = c(1, 1, 2, 2, 2), x = 1:5) (expect_error(df %>% mutate(out = env(a = 1)))) Output Error in `mutate()`: i In argument: `out = env(a = 1)`. Caused by error: ! `out` must be a vector, not an environment. Code (expect_error(df %>% group_by(g) %>% mutate(out = env(a = 1)))) Output Error in `mutate()`: i In argument: `out = env(a = 1)`. i In group 1: `g = 1`. Caused by error: ! `out` must be a vector, not an environment. Code (expect_error(df %>% rowwise() %>% mutate(out = rnorm))) Output Error in `mutate()`: i In argument: `out = rnorm`. i In row 1. Caused by error: ! `out` must be a vector, not a function. i Did you mean: `out = list(rnorm)` ? Code (expect_error(data.frame(x = rep(1:5, each = 3)) %>% group_by(x) %>% mutate( val = ifelse(x < 3, "foo", 2)))) Output Error in `mutate()`: i In argument: `val = ifelse(x < 3, "foo", 2)`. Caused by error: ! `val` must return compatible vectors across groups. i Result of type for group 1: `x = 1`. i Result of type for group 3: `x = 3`. Code (expect_error(tibble(a = 1:3, b = 4:6) %>% group_by(a) %>% mutate(if (a == 1) NULL else "foo"))) Output Error in `mutate()`: i In argument: `if (a == 1) NULL else "foo"`. i In group 1: `a = 1`. Caused by error: ! `if (a == 1) NULL else "foo"` must return compatible vectors across groups. x Can't combine NULL and non NULL results. Code (expect_error(tibble(a = 1:3, b = 4:6) %>% group_by(a) %>% mutate(if (a == 2) NULL else "foo"))) Output Error in `mutate()`: i In argument: `if (a == 2) NULL else "foo"`. i In group 2: `a = 2`. Caused by error: ! `if (a == 2) NULL else "foo"` must return compatible vectors across groups. x Can't combine NULL and non NULL results. Code (expect_error(data.frame(x = c(2, 2, 3, 3)) %>% mutate(int = 1:5))) Output Error in `mutate()`: i In argument: `int = 1:5`. Caused by error: ! `int` must be size 4 or 1, not 5. Code (expect_error(data.frame(x = c(2, 2, 3, 3)) %>% group_by(x) %>% mutate(int = 1: 5))) Output Error in `mutate()`: i In argument: `int = 1:5`. i In group 1: `x = 2`. Caused by error: ! `int` must be size 2 or 1, not 5. Code (expect_error(data.frame(x = c(2, 3, 3)) %>% group_by(x) %>% mutate(int = 1:5))) Output Error in `mutate()`: i In argument: `int = 1:5`. i In group 1: `x = 2`. Caused by error: ! `int` must be size 1, not 5. Code (expect_error(data.frame(x = c(2, 2, 3, 3)) %>% rowwise() %>% mutate(int = 1:5)) ) Output Error in `mutate()`: i In argument: `int = 1:5`. i In row 1. Caused by error: ! `int` must be size 1, not 5. i Did you mean: `int = list(1:5)` ? Code (expect_error(tibble(y = list(1:3, "a")) %>% rowwise() %>% mutate(y2 = y))) Output Error in `mutate()`: i In argument: `y2 = y`. i In row 1. Caused by error: ! `y2` must be size 1, not 3. i Did you mean: `y2 = list(y)` ? Code (expect_error(data.frame(x = 1:10) %>% mutate(y = 11:20, y = 1:2))) Output Error in `mutate()`: i In argument: `y = 1:2`. Caused by error: ! `y` must be size 10 or 1, not 2. Code (expect_error(tibble(a = 1) %>% mutate(c = .data$b))) Output Error in `mutate()`: i In argument: `c = .data$b`. Caused by error in `.data$b`: ! Column `b` not found in `.data`. Code (expect_error(tibble(a = 1:3) %>% group_by(a) %>% mutate(c = .data$b))) Output Error in `mutate()`: i In argument: `c = .data$b`. i In group 1: `a = 1`. Caused by error in `.data$b`: ! Column `b` not found in `.data`. Code lazy <- (function(x) list(enquo(x))) res <- tbl %>% rowwise() %>% mutate(z = lazy(x), .keep = "unused") (expect_error(eval_tidy(res$z[[1]]))) Output Error: ! Obsolete data mask. x Too late to resolve `x` after the end of `dplyr::mutate()`. i Did you save an object that uses `x` lazily in a column in the `dplyr::mutate()` expression ? Code (expect_error(tibble() %>% mutate(stop("{")))) Output Error in `mutate()`: i In argument: `stop("{")`. Caused by error: ! { # mutate() errors refer to expressions if not named Code (expect_error(mutate(mtcars, 1:3))) Output Error in `mutate()`: i In argument: `1:3`. Caused by error: ! `1:3` must be size 32 or 1, not 3. Code (expect_error(mutate(group_by(mtcars, cyl), 1:3))) Output Error in `mutate()`: i In argument: `1:3`. i In group 1: `cyl = 4`. Caused by error: ! `1:3` must be size 11 or 1, not 3. # `mutate()` doesn't allow data frames with missing or empty names (#6758) Code mutate(df1) Condition Error in `mutate()`: ! Can't transform a data frame with `NA` or `""` names. --- Code mutate(df2) Condition Error in `mutate()`: ! Can't transform a data frame with `NA` or `""` names. dplyr/tests/testthat/_snaps/join-by.md0000644000176200001440000002351714525503021017567 0ustar liggesusers# joining by nothing is an error Code join_by() Condition Error in `join_by()`: ! Must supply at least one expression. i If you want a cross join, use `cross_join()`. # nicely catches required missing arguments when wrapped Code fn(a) Condition Error: ! Expressions using `==` can't contain missing arguments. x Argument `y` is missing. # allows for namespaced helpers (#6838) Code join_by(dplyr::between(x, left, right)) Output Join By: - dplyr::between(x, left, right) --- Code join_by(dplyr::within(xl, xu, yl, yu)) Output Join By: - dplyr::within(xl, xu, yl, yu) --- Code join_by(dplyr::overlaps(xl, xu, yl, yu)) Output Join By: - dplyr::overlaps(xl, xu, yl, yu) --- Code join_by(dplyr::closest(x < y)) Output Join By: - dplyr::closest(x < y) # has an informative print method Code join_by(a, b) Output Join By: - a - b --- Code join_by("a", "b") Output Join By: - "a" - "b" --- Code join_by(a == a, b >= c) Output Join By: - a == a - b >= c --- Code join_by(a == a, b >= "c") Output Join By: - a == a - b >= "c" --- Code join_by(a == a, closest(b >= c), closest(d < e)) Output Join By: - a == a - closest(b >= c) - closest(d < e) # has informative error messages Code join_by(a = b) Condition Error in `join_by()`: ! Can't name join expressions. i Did you use `=` instead of `==`? --- Code join_by(NULL) Condition Error in `join_by()`: ! Expressions can't be empty. x Expression 1 is empty. --- Code join_by(foo(x > y)) Condition Error in `join_by()`: ! Expressions must use one of: `==`, `>=`, `>`, `<=`, `<`, `closest()`, `between()`, `overlaps()`, or `within()`. i Expression 1 is `foo(x > y)`. --- Code join_by(x == y, x^y) Condition Error in `join_by()`: ! Expressions must use one of: `==`, `>=`, `>`, `<=`, `<`, `closest()`, `between()`, `overlaps()`, or `within()`. i Expression 2 is `x^y`. --- Code join_by(x + 1 == y) Condition Error in `join_by()`: ! Expressions can't contain computed columns, and can only reference columns by name or by explicitly specifying a side, like `x$col` or `y$col`. i Expression 1 contains `x + 1`. --- Code join_by(x == y + 1) Condition Error in `join_by()`: ! Expressions can't contain computed columns, and can only reference columns by name or by explicitly specifying a side, like `x$col` or `y$col`. i Expression 1 contains `y + 1`. --- Code join_by(1) Condition Error in `join_by()`: ! Each element of `...` must be a single column name or a join by expression. x Element 1 is not a name and not an expression. --- Code join_by(1()) Condition Error in `join_by()`: ! Expressions must use one of: `==`, `>=`, `>`, `<=`, `<`, `closest()`, `between()`, `overlaps()`, or `within()`. i Expression 1 is `1()`. --- Code join_by(dplyrr::between(x, left, right)) Condition Error in `join_by()`: ! Expressions can only be namespace prefixed with `dplyr::`. i Expression 1 is `dplyrr::between(x, left, right)`. --- Code join_by(x$a) Condition Error in `join_by()`: ! Can't use `$` when specifying a single column name. i Expression 1 is `x$a`. --- Code join_by(z$a == y$b) Condition Error in `join_by()`: ! The left-hand side of a `$` expression must be either `x$` or `y$`. i Expression 1 contains `z$a`. --- Code join_by(x$a == z$b) Condition Error in `join_by()`: ! The left-hand side of a `$` expression must be either `x$` or `y$`. i Expression 1 contains `z$b`. --- Code join_by((x + 1)$y == b) Condition Error in `join_by()`: ! The left-hand side of a `$` expression must be a symbol or string. i Expression 1 contains `(x + 1)$y`. --- Code join_by(x$a == x$b) Condition Error in `join_by()`: ! The left and right-hand sides of a binary expression must reference different tables. i Expression 1 contains `x$a == x$b`. --- Code join_by(y$a == b) Condition Error in `join_by()`: ! The left and right-hand sides of a binary expression must reference different tables. i Expression 1 contains `y$a == b`. --- Code join_by(between(x$a, x$a, x$b)) Condition Error in `join_by()`: ! Expressions containing `between()` can't all reference the same table. i Expression 1 is `between(x$a, x$a, x$b)`. --- Code join_by(within(x$a, x$b, x$a, x$b)) Condition Error in `join_by()`: ! Expressions containing `within()` can't all reference the same table. i Expression 1 is `within(x$a, x$b, x$a, x$b)`. --- Code join_by(overlaps(a, b, x$a, x$b)) Condition Error in `join_by()`: ! Expressions containing `overlaps()` can't all reference the same table. i Expression 1 is `overlaps(a, b, x$a, x$b)`. --- Code join_by(closest(x$a >= x$b)) Condition Error in `join_by()`: ! The left and right-hand sides of a binary expression must reference different tables. i Expression 1 contains `x$a >= x$b`. --- Code join_by(between(a, x$a, y$b)) Condition Error in `join_by()`: ! Expressions containing `between()` must reference the same table for the lower and upper bounds. i Expression 1 is `between(a, x$a, y$b)`. --- Code join_by(within(x$a, y$b, y$a, y$b)) Condition Error in `join_by()`: ! Expressions containing `within()` must reference the same table for the left-hand side lower and upper bounds. i Expression 1 is `within(x$a, y$b, y$a, y$b)`. --- Code join_by(overlaps(x$a, x$b, y$a, x$b)) Condition Error in `join_by()`: ! Expressions containing `overlaps()` must reference the same table for the right-hand side lower and upper bounds. i Expression 1 is `overlaps(x$a, x$b, y$a, x$b)`. --- Code join_by(`>`(x)) Condition Error: ! Expressions using `>` can't contain missing arguments. x Argument `y` is missing. --- Code join_by(between(x)) Condition Error: ! Expressions using `between()` can't contain missing arguments. x Argument `y_lower` is missing. --- Code join_by(within(x)) Condition Error: ! Expressions using `within()` can't contain missing arguments. x Argument `x_upper` is missing. --- Code join_by(overlaps(x)) Condition Error: ! Expressions using `overlaps()` can't contain missing arguments. x Argument `x_upper` is missing. --- Code join_by(closest()) Condition Error: ! Expressions using `closest()` can't contain missing arguments. x Argument `expr` is missing. --- Code join_by(`$`(x) > y) Condition Error: ! Expressions using `$` can't contain missing arguments. x Argument `name` is missing. --- Code join_by(closest(a >= b, 1)) Condition Error in `closest()`: ! unused argument (1) --- Code join_by(closest(a == b)) Condition Error in `join_by()`: ! The expression used in `closest()` can't use `==`. i Expression 1 is `closest(a == b)`. --- Code join_by(closest(x)) Condition Error in `join_by()`: ! The first argument of `closest()` must be an expression. i Expression 1 is `closest(x)`. --- Code join_by(closest(1)) Condition Error in `join_by()`: ! The first argument of `closest()` must be an expression. i Expression 1 is `closest(1)`. --- Code join_by(closest(x + y)) Condition Error in `join_by()`: ! The expression used in `closest()` must use one of: `>=`, `>`, `<=`, or `<`. i Expression 1 is `closest(x + y)`. --- Code join_by(between(x, lower, upper, bounds = 1)) Condition Error: ! `bounds` must be a string or character vector. --- Code join_by(between(x, lower, upper, bounds = "a")) Condition Error: ! `bounds` must be one of "[]", "[)", "(]", or "()", not "a". --- Code join_by(overlaps(x, y, lower, upper, bounds = 1)) Condition Error: ! `bounds` must be a string or character vector. --- Code join_by(overlaps(x, y, lower, upper, bounds = "a")) Condition Error: ! `bounds` must be one of "[]", "[)", "(]", or "()", not "a". --- Code join_by(between(x, lower, upper, foo = 1)) Condition Error: ! `...` must be empty. i Non-empty dots were detected inside `between()`. --- Code join_by(overlaps(x, y, lower, upper, foo = 1)) Condition Error: ! `...` must be empty. i Non-empty dots were detected inside `overlaps()`. # as_join_by() emits useful errors Code as_join_by(FALSE) Condition Error: ! `by` must be a (named) character vector, list, `join_by()` result, or NULL, not `FALSE`. # join_by_common() emits useful information Code by <- join_by_common(c("x", "y"), c("x", "y")) Message Joining with `by = join_by(x, y)` --- Code by <- join_by_common(c("_x", "foo bar"), c("_x", "foo bar")) Message Joining with `by = join_by(`_x`, `foo bar`)` --- Code join_by_common(c("x", "y"), c("w", "z")) Condition Error: ! `by` must be supplied when `x` and `y` have no common variables. i Use `cross_join()` to perform a cross-join. dplyr/tests/testthat/_snaps/defunct.md0000644000176200001440000000405214416000515017640 0ustar liggesusers# generate informative errors Code id() Condition Error: ! `id()` was deprecated in dplyr 0.5.0 and is now defunct. i Please use `vctrs::vec_group_id()` instead. Code failwith() Condition Error: ! `failwith()` was deprecated in dplyr 0.7.0 and is now defunct. i Please use `purrr::possibly()` instead. Code select_vars() Condition Error: ! `select_vars()` was deprecated in dplyr 0.8.4 and is now defunct. i Please use `tidyselect::vars_select()` instead. Code rename_vars() Condition Error: ! `rename_vars()` was deprecated in dplyr 0.8.4 and is now defunct. i Please use `tidyselect::vars_rename()` instead. Code select_var() Condition Error: ! `select_var()` was deprecated in dplyr 0.8.4 and is now defunct. i Please use `tidyselect::vars_pull()` instead. Code current_vars() Condition Error: ! `current_vars()` was deprecated in dplyr 0.8.4 and is now defunct. i Please use `tidyselect::peek_vars()` instead. Code bench_tbls() Condition Error: ! `bench_tbls()` was deprecated in dplyr 1.0.0 and is now defunct. Code compare_tbls() Condition Error: ! `compare_tbls()` was deprecated in dplyr 1.0.0 and is now defunct. Code compare_tbls2() Condition Error: ! `compare_tbls2()` was deprecated in dplyr 1.0.0 and is now defunct. Code eval_tbls() Condition Error: ! `eval_tbls()` was deprecated in dplyr 1.0.0 and is now defunct. Code eval_tbls2() Condition Error: ! `eval_tbls2()` was deprecated in dplyr 1.0.0 and is now defunct. Code location() Condition Error: ! `location()` was deprecated in dplyr 1.0.0 and is now defunct. i Please use `lobstr::ref()` instead. Code changes() Condition Error: ! `changes()` was deprecated in dplyr 1.0.0 and is now defunct. i Please use `lobstr::ref()` instead. dplyr/tests/testthat/_snaps/vec-case-when.md0000644000176200001440000001661014416000545020643 0ustar liggesusers# `conditions` inputs can be size zero Code vec_case_when(list(logical()), list(1:2)) Condition Error in `vec_case_when()`: ! `values[[1]]` must have size 0, not size 2. # `values` are cast to their common type Code vec_case_when(list(FALSE, TRUE), list(1, "x")) Condition Error in `vec_case_when()`: ! Can't combine `values[[1]]` and `values[[2]]` . # `values` must be size 1 or same size as the `conditions` Code vec_case_when(list(c(TRUE, FALSE, TRUE, TRUE)), list(1:3)) Condition Error in `vec_case_when()`: ! `values[[1]]` must have size 4, not size 3. # `default` must be size 1 or same size as `conditions` (exact same as any other `values` input) Code vec_case_when(list(FALSE), list(1L), default = 2:3) Condition Error in `vec_case_when()`: ! `default` must have size 1, not size 2. # `default_arg` can be customized Code vec_case_when(list(FALSE), list(1L), default = 2:3, default_arg = "foo") Condition Error in `vec_case_when()`: ! `foo` must have size 1, not size 2. --- Code vec_case_when(list(FALSE), list(1L), default = "x", default_arg = "foo") Condition Error in `vec_case_when()`: ! Can't combine `values[[1]]` and `foo` . # `conditions_arg` is validated Code vec_case_when(list(TRUE), list(1), conditions_arg = 1) Condition Error in `vec_case_when()`: ! `conditions_arg` must be a string. # `values_arg` is validated Code vec_case_when(list(TRUE), list(1), values_arg = 1) Condition Error in `vec_case_when()`: ! `values_arg` must be a string. # `default_arg` is validated Code vec_case_when(list(TRUE), list(1), default_arg = 1) Condition Error in `vec_case_when()`: ! `default_arg` must be a string. # `conditions` must all be the same size Code vec_case_when(list(c(TRUE, FALSE), TRUE), list(1, 2)) Condition Error in `vec_case_when()`: ! `conditions[[2]]` must have size 2, not size 1. --- Code vec_case_when(list(c(TRUE, FALSE), c(TRUE, FALSE, TRUE)), list(1, 2)) Condition Error in `vec_case_when()`: ! Can't recycle `conditions[[1]]` (size 2) to match `conditions[[2]]` (size 3). # `conditions` must be logical (and aren't cast to logical!) Code vec_case_when(list(1), list(2)) Condition Error in `vec_case_when()`: ! `conditions[[1]]` must be a logical vector, not the number 1. --- Code vec_case_when(list(TRUE, 3.5), list(2, 4)) Condition Error in `vec_case_when()`: ! `conditions[[2]]` must be a logical vector, not the number 3.5. # `size` overrides the `conditions` sizes Code vec_case_when(list(TRUE), list(1), size = 5) Condition Error in `vec_case_when()`: ! `conditions[[1]]` must have size 5, not size 1. --- Code vec_case_when(list(c(TRUE, FALSE), c(TRUE, FALSE, TRUE)), list(1, 2), size = 2) Condition Error in `vec_case_when()`: ! `conditions[[2]]` must have size 2, not size 3. # `ptype` overrides the `values` types Code vec_case_when(list(FALSE, TRUE), list(1, 2), ptype = character()) Condition Error in `vec_case_when()`: ! Can't convert `values[[1]]` to . # number of `conditions` and `values` must be the same Code vec_case_when(list(TRUE), list()) Condition Error in `vec_case_when()`: ! The number of supplied conditions (1) must equal the number of supplied values (0). --- Code vec_case_when(list(TRUE, TRUE), list(1)) Condition Error in `vec_case_when()`: ! The number of supplied conditions (2) must equal the number of supplied values (1). # can't have empty inputs Code vec_case_when(list(), list()) Condition Error in `vec_case_when()`: ! At least one condition must be supplied. --- Code vec_case_when(list(), list(), default = 1) Condition Error in `vec_case_when()`: ! At least one condition must be supplied. # dots must be empty Code vec_case_when(list(TRUE), list(1), 2) Condition Error in `vec_case_when()`: ! `...` must be empty. x Problematic argument: * ..1 = 2 i Did you forget to name an argument? # `conditions` must be a list Code vec_case_when(1, list(2)) Condition Error in `vec_case_when()`: ! `conditions` must be a list, not the number 1. # `values` must be a list Code vec_case_when(list(TRUE), 1) Condition Error in `vec_case_when()`: ! `values` must be a list, not the number 1. # named inputs show up in the error message Code vec_case_when(list(x = 1.5), list(1)) Condition Error in `vec_case_when()`: ! `conditions$x` must be a logical vector, not the number 1.5. --- Code vec_case_when(list(x = 1.5), list(1), conditions_arg = "foo") Condition Error in `vec_case_when()`: ! `foo$x` must be a logical vector, not the number 1.5. --- Code vec_case_when(list(x = 1.5), list(1), conditions_arg = "") Condition Error in `vec_case_when()`: ! `x` must be a logical vector, not the number 1.5. --- Code vec_case_when(list(x = TRUE, y = c(TRUE, FALSE)), list(1, 2)) Condition Error in `vec_case_when()`: ! `conditions$x` must have size 2, not size 1. --- Code vec_case_when(list(x = TRUE, y = c(TRUE, FALSE)), list(1, 2), conditions_arg = "foo") Condition Error in `vec_case_when()`: ! `foo$x` must have size 2, not size 1. --- Code vec_case_when(list(x = TRUE, y = c(TRUE, FALSE)), list(1, 2), conditions_arg = "") Condition Error in `vec_case_when()`: ! `x` must have size 2, not size 1. --- Code vec_case_when(list(TRUE, FALSE), list(1, x = "y")) Condition Error in `vec_case_when()`: ! Can't combine `values[[1]]` and `values$x` . --- Code vec_case_when(list(TRUE, FALSE), list(1, x = "y"), values_arg = "foo") Condition Error in `vec_case_when()`: ! Can't combine `foo[[1]]` and `foo$x` . --- Code vec_case_when(list(TRUE, FALSE), list(1, x = "y"), values_arg = "") Condition Error in `vec_case_when()`: ! Can't combine `..1` and `x` . --- Code vec_case_when(list(TRUE), list(NULL)) Condition Error in `vec_case_when()`: ! `values[[1]]` must be a vector, not `NULL`. --- Code vec_case_when(list(TRUE), list(x = NULL)) Condition Error in `vec_case_when()`: ! `values$x` must be a vector, not `NULL`. --- Code vec_case_when(list(TRUE), list(NULL), values_arg = "foo") Condition Error in `vec_case_when()`: ! `foo[[1]]` must be a vector, not `NULL`. --- Code vec_case_when(list(TRUE), list(x = NULL), values_arg = "foo") Condition Error in `vec_case_when()`: ! `foo$x` must be a vector, not `NULL`. --- Code vec_case_when(list(TRUE), list(NULL), values_arg = "") Condition Error in `vec_case_when()`: ! `..1` must be a vector, not `NULL`. --- Code vec_case_when(list(TRUE), list(x = NULL), values_arg = "") Condition Error in `vec_case_when()`: ! `x` must be a vector, not `NULL`. dplyr/tests/testthat/_snaps/filter.md0000644000176200001440000002021514416000521017471 0ustar liggesusers# filter() allows matrices with 1 column with a deprecation warning (#6091) Code out <- filter(df, matrix(c(TRUE, FALSE), nrow = 2)) Condition Warning: Using one column matrices in `filter()` was deprecated in dplyr 1.1.0. i Please use one dimensional logical vectors instead. --- Code out <- filter(gdf, matrix(c(TRUE, FALSE), nrow = 2)) Condition Warning: Using one column matrices in `filter()` was deprecated in dplyr 1.1.0. i Please use one dimensional logical vectors instead. # filter() disallows matrices with >1 column Code (expect_error(filter(df, matrix(TRUE, nrow = 3, ncol = 2)))) Output Error in `filter()`: i In argument: `matrix(TRUE, nrow = 3, ncol = 2)`. Caused by error: ! `..1` must be a logical vector, not a logical matrix. # filter() disallows arrays with >2 dimensions Code (expect_error(filter(df, array(TRUE, dim = c(3, 1, 1))))) Output Error in `filter()`: i In argument: `array(TRUE, dim = c(3, 1, 1))`. Caused by error: ! `..1` must be a logical vector, not a logical array. # filter() gives useful error messages Code (expect_error(iris %>% group_by(Species) %>% filter(1:n()))) Output Error in `filter()`: i In argument: `1:n()`. i In group 1: `Species = setosa`. Caused by error: ! `..1` must be a logical vector, not an integer vector. Code (expect_error(iris %>% filter(1:n()))) Output Error in `filter()`: i In argument: `1:n()`. Caused by error: ! `..1` must be a logical vector, not an integer vector. Code (expect_error(filter(data.frame(x = 1:2), matrix(c(TRUE, FALSE, TRUE, FALSE), nrow = 2)))) Output Error in `filter()`: i In argument: `matrix(c(TRUE, FALSE, TRUE, FALSE), nrow = 2)`. Caused by error: ! `..1` must be a logical vector, not a logical matrix. Code (expect_error(iris %>% group_by(Species) %>% filter(c(TRUE, FALSE)))) Output Error in `filter()`: i In argument: `c(TRUE, FALSE)`. i In group 1: `Species = setosa`. Caused by error: ! `..1` must be of size 50 or 1, not size 2. Code (expect_error(iris %>% rowwise(Species) %>% filter(c(TRUE, FALSE)))) Output Error in `filter()`: i In argument: `c(TRUE, FALSE)`. i In row 1. Caused by error: ! `..1` must be of size 1, not size 2. Code (expect_error(iris %>% filter(c(TRUE, FALSE)))) Output Error in `filter()`: i In argument: `c(TRUE, FALSE)`. Caused by error: ! `..1` must be of size 150 or 1, not size 2. Code (expect_error(iris %>% group_by(Species) %>% filter(data.frame(c(TRUE, FALSE)))) ) Output Error in `filter()`: i In argument: `data.frame(c(TRUE, FALSE))`. i In group 1: `Species = setosa`. Caused by error: ! `..1` must be of size 50 or 1, not size 2. Code (expect_error(iris %>% rowwise() %>% filter(data.frame(c(TRUE, FALSE))))) Output Error in `filter()`: i In argument: `data.frame(c(TRUE, FALSE))`. i In row 1. Caused by error: ! `..1` must be of size 1, not size 2. Code (expect_error(iris %>% filter(data.frame(c(TRUE, FALSE))))) Output Error in `filter()`: i In argument: `data.frame(c(TRUE, FALSE))`. Caused by error: ! `..1` must be of size 150 or 1, not size 2. Code (expect_error(tibble(x = 1) %>% filter(c(TRUE, TRUE)))) Output Error in `filter()`: i In argument: `c(TRUE, TRUE)`. Caused by error: ! `..1` must be of size 1, not size 2. Code (expect_error(iris %>% group_by(Species) %>% filter(data.frame(Sepal.Length > 3, 1:n())))) Condition Warning: Returning data frames from `filter()` expressions was deprecated in dplyr 1.0.8. i Please use `if_any()` or `if_all()` instead. Output Error in `filter()`: i In argument: `data.frame(Sepal.Length > 3, 1:n())`. i In group 1: `Species = setosa`. Caused by error: ! `..1$X1.n..` must be a logical vector, not an integer vector. Code (expect_error(iris %>% filter(data.frame(Sepal.Length > 3, 1:n())))) Condition Warning: Returning data frames from `filter()` expressions was deprecated in dplyr 1.0.8. i Please use `if_any()` or `if_all()` instead. Output Error in `filter()`: i In argument: `data.frame(Sepal.Length > 3, 1:n())`. Caused by error: ! `..1$X1.n..` must be a logical vector, not an integer vector. Code (expect_error(mtcars %>% filter(`_x`))) Output Error in `filter()`: i In argument: `_x`. Caused by error: ! object '_x' not found Code (expect_error(mtcars %>% group_by(cyl) %>% filter(`_x`))) Output Error in `filter()`: i In argument: `_x`. i In group 1: `cyl = 4`. Caused by error: ! object '_x' not found Code (expect_error(filter(mtcars, x = 1))) Output Error in `filter()`: ! We detected a named input. i This usually means that you've used `=` instead of `==`. i Did you mean `x == 1`? Code (expect_error(filter(mtcars, y > 2, z = 3))) Output Error in `filter()`: ! We detected a named input. i This usually means that you've used `=` instead of `==`. i Did you mean `z == 3`? Code (expect_error(filter(mtcars, TRUE, x = 1))) Output Error in `filter()`: ! We detected a named input. i This usually means that you've used `=` instead of `==`. i Did you mean `x == 1`? Code (expect_error(filter(ts(1:10)))) Output Error in `filter()`: ! Incompatible data source. x `.data` is a object, not a data source. i Did you want to use `stats::filter()`? Code (expect_error(tibble() %>% filter(stop("{")))) Output Error in `filter()`: i In argument: `stop("{")`. Caused by error: ! { Code data.frame(x = 1, y = 1) %>% filter(across(everything(), ~ .x > 0)) Condition Warning: Using `across()` in `filter()` was deprecated in dplyr 1.0.8. i Please use `if_any()` or `if_all()` instead. Output x y 1 1 1 Code data.frame(x = 1, y = 1) %>% filter(data.frame(x > 0, y > 0)) Condition Warning: Returning data frames from `filter()` expressions was deprecated in dplyr 1.0.8. i Please use `if_any()` or `if_all()` instead. Output x y 1 1 1 # `filter()` doesn't allow data frames with missing or empty names (#6758) Code filter(df1) Condition Error in `filter()`: ! Can't transform a data frame with `NA` or `""` names. --- Code filter(df2) Condition Error in `filter()`: ! Can't transform a data frame with `NA` or `""` names. # can't use `.by` with `.preserve` Code filter(df, .by = x, .preserve = TRUE) Condition Error in `filter()`: ! Can't supply both `.by` and `.preserve`. # catches `.by` with grouped-df Code filter(gdf, .by = x) Condition Error in `filter()`: ! Can't supply `.by` when `.data` is a grouped data frame. # catches `.by` with rowwise-df Code filter(rdf, .by = x) Condition Error in `filter()`: ! Can't supply `.by` when `.data` is a rowwise data frame. # catches `by` typo (#6647) Code filter(df, by = x) Condition Error in `filter()`: ! Can't specify an argument named `by` in this verb. i Did you mean to use `.by` instead? dplyr/tests/testthat/_snaps/join-cols.md0000644000176200001440000000652014416000525020110 0ustar liggesusers# can't mix non-equi conditions with `keep = FALSE` (#6499) Code join_cols(c("x", "y"), c("x", "z"), by = join_by(x, y > z), keep = FALSE) Condition Error: ! Can't set `keep = FALSE` when using an inequality, rolling, or overlap join. --- Code join_cols(c("xl", "xu"), c("yl", "yu"), by = join_by(xl >= yl, xu < yu), keep = FALSE) Condition Error: ! Can't set `keep = FALSE` when using an inequality, rolling, or overlap join. --- Code join_cols("x", c("yl", "yu"), by = join_by(between(x, yl, yu)), keep = FALSE) Condition Error: ! Can't set `keep = FALSE` when using an inequality, rolling, or overlap join. --- Code join_cols(c("xl", "xu"), c("yl", "yu"), by = join_by(overlaps(xl, xu, yl, yu)), keep = FALSE) Condition Error: ! Can't set `keep = FALSE` when using an inequality, rolling, or overlap join. # can't duplicate key between equi condition and non-equi condition Code join_cols("x", c("xl", "xu"), by = join_by(x > xl, x == xu)) Condition Error: ! Join columns in `x` must be unique. x Problem with `x`. --- Code join_cols(c("xl", "xu"), "x", by = join_by(xl < x, xu == x)) Condition Error: ! Join columns in `y` must be unique. x Problem with `x`. # emits useful messages Code join_cols(c("x", "y"), c("y", "y"), join_by(y)) Condition Error: ! Input columns in `y` must be unique. x Problem with `y`. --- Code join_cols(c("y", "y"), c("x", "y"), join_by(y)) Condition Error: ! Input columns in `x` must be unique. x Problem with `y`. --- Code join_cols(xy, xy, by = as_join_by(list("1", y = "2"))) Condition Error in `as_join_by()`: ! `by$x` must evaluate to a character vector. --- Code join_cols(xy, xy, by = as_join_by(list(x = "1", "2"))) Condition Error in `as_join_by()`: ! `by$y` must evaluate to a character vector. --- Code join_cols(xy, xy, by = as_join_by(c("x", NA))) Condition Error: ! Join columns in `x` can't be `NA`. x Problem at position 2. --- Code join_cols(xy, xy, by = as_join_by(c("aaa", "bbb"))) Condition Error: ! Join columns in `x` must be present in the data. x Problem with `aaa` and `bbb`. --- Code join_cols(xy, xy, by = as_join_by(c("x", "x", "x"))) Condition Error: ! Join columns in `x` must be unique. x Problem with `x`. --- Code join_cols(xyz, xyz, by = join_by(x, x > y, z)) Condition Error: ! Join columns in `x` must be unique. x Problem with `x`. --- Code join_cols(xy, xy, by = join_by(x), suffix = "x") Condition Error: ! `suffix` must be a character vector of length 2, not the string "x" of length 1. --- Code join_cols(xy, xy, by = join_by(x), suffix = c("", NA)) Condition Error: ! `suffix` can't be `NA`. # references original column in `y` when there are type errors (#6465) Code (expect_error(join_cast_common(x_key, y_key, vars))) Output Error: ! Can't join `x$a` with `y$b` due to incompatible types. i `x$a` is a . i `y$b` is a . dplyr/tests/testthat/_snaps/pick.md0000644000176200001440000001320214416000534017134 0ustar liggesusers# with `rowwise()` data, leaves list-cols unwrapped (#5951, #6264) Code mutate(rdf, z = pick(x, y)) Condition Error in `mutate()`: i In argument: `z = pick(x, y)`. i In row 2. Caused by error: ! `z` must be size 1, not 2. i Did you mean: `z = list(pick(x, y))` ? --- Code mutate(rdf, z = pick_wrapper(x, y)) Condition Error in `mutate()`: i In argument: `z = pick_wrapper(x, y)`. i In row 2. Caused by error: ! `z` must be size 1, not 2. i Did you mean: `z = list(pick_wrapper(x, y))` ? # can't explicitly select grouping columns (#5460) Code mutate(gdf, y = pick(g)) Condition Error in `mutate()`: i In argument: `y = pick(g)`. Caused by error in `pick()`: ! Can't subset columns that don't exist. x Column `g` doesn't exist. --- Code mutate(gdf, y = pick_wrapper(g)) Condition Error in `mutate()`: i In argument: `y = pick_wrapper(g)`. i In group 1: `g = 1`. Caused by error in `pick()`: ! Can't subset columns that don't exist. x Column `g` doesn't exist. # must supply at least one selector to `pick()` Code mutate(df, y = pick()) Condition Error in `mutate()`: i In argument: `y = pick()`. Caused by error in `pick()`: ! Must supply at least one input to `pick()`. --- Code mutate(df, y = pick_wrapper()) Condition Error in `mutate()`: i In argument: `y = pick_wrapper()`. Caused by error in `pick()`: ! Must supply at least one input to `pick()`. # the tidyselection and column extraction are evaluated on the current data Code mutate(gdf, x = NULL, y = pick(x)) Condition Error in `mutate()`: i In argument: `y = pick(x)`. Caused by error in `pick()`: ! Can't subset columns that don't exist. x Column `x` doesn't exist. --- Code mutate(gdf, x = NULL, y = pick_wrapper(x)) Condition Error in `mutate()`: i In argument: `y = pick_wrapper(x)`. i In group 1: `g = 1`. Caused by error in `pick()`: ! Can't subset columns that don't exist. x Column `x` doesn't exist. # can call `pick()` from a user defined function Code mutate(gdf, d = my_pick()) Condition Error in `mutate()`: i In argument: `d = my_pick()`. i In group 1: `a = 1`. Caused by error in `all_of()`: ! Can't subset columns that don't exist. x Column `a` doesn't exist. --- Code mutate(gdf, d = my_pick(y)) Condition Error in `mutate()`: i In argument: `d = my_pick(y)`. i In group 1: `a = 1`. Caused by error in `all_of()`: ! Can't subset columns that don't exist. x Column `a` doesn't exist. # errors correctly outside mutate context Code pick() Condition Error in `pick()`: ! Must only be used inside data-masking verbs like `mutate()`, `filter()`, and `group_by()`. --- Code pick(a, b) Condition Error in `pick()`: ! Must only be used inside data-masking verbs like `mutate()`, `filter()`, and `group_by()`. # when expansion occurs, error labels use the pre-expansion quosure Code mutate(df, if (cur_group_id() == 1L) pick(x) else "x", .by = g) Condition Error in `mutate()`: i In argument: `if (cur_group_id() == 1L) pick(x) else "x"`. Caused by error: ! `if (cur_group_id() == 1L) pick(x) else "x"` must return compatible vectors across groups. i Result of type > for group 1: `g = 1`. i Result of type for group 2: `g = 2`. # doesn't allow renaming Code mutate(data.frame(x = 1), pick(y = x)) Condition Error in `mutate()`: i In argument: `pick(y = x)`. Caused by error in `pick()`: ! Can't rename variables in this context. --- Code mutate(data.frame(x = 1), pick_wrapper(y = x)) Condition Error in `mutate()`: i In argument: `pick_wrapper(y = x)`. Caused by error in `pick()`: ! Can't rename variables in this context. # `pick()` errors in `arrange()` are useful Code arrange(df, pick(y)) Condition Error in `arrange()`: i In argument: `..1 = pick(y)`. Caused by error in `pick()`: ! Can't subset columns that don't exist. x Column `y` doesn't exist. --- Code arrange(df, foo(pick(x))) Condition Error in `arrange()`: i In argument: `..1 = foo(pick(x))`. Caused by error in `foo()`: ! could not find function "foo" # `filter()` with `pick()` that uses invalid tidy-selection errors Code filter(df, pick(x, a)) Condition Error in `filter()`: i In argument: `pick(x, a)`. Caused by error in `pick()`: ! Can't subset columns that don't exist. x Column `a` doesn't exist. --- Code filter(df, pick_wrapper(x, a)) Condition Error in `filter()`: i In argument: `pick_wrapper(x, a)`. Caused by error in `pick()`: ! Can't subset columns that don't exist. x Column `a` doesn't exist. # `filter()` that doesn't use `pick()` result correctly errors Code filter(df, pick(x, y)$x) Condition Error in `filter()`: i In argument: `asNamespace("dplyr")$dplyr_pick_tibble(x = x, y = y)$x`. Caused by error: ! `..1` must be a logical vector, not a double vector. --- Code filter(df, pick_wrapper(x, y)$x) Condition Error in `filter()`: i In argument: `pick_wrapper(x, y)$x`. Caused by error: ! `..1` must be a logical vector, not a double vector. dplyr/tests/testthat/_snaps/distinct.md0000644000176200001440000000165414416000517020040 0ustar liggesusers# distinct errors when selecting an unknown column (#3140) Code df <- tibble(g = c(1, 2), x = c(1, 2)) (expect_error(df %>% distinct(aa, x))) Output Error in `distinct()`: ! Must use existing variables. x `aa` not found in `.data`. Code (expect_error(df %>% distinct(aa, bb))) Output Error in `distinct()`: ! Must use existing variables. x `aa` not found in `.data`. x `bb` not found in `.data`. Code (expect_error(df %>% distinct(.data$aa))) Output Error in `distinct()`: ! Must use existing variables. x `aa` not found in `.data`. Code (expect_error(df %>% distinct(y = a + 1))) Output Error in `distinct()`: i In argument: `y = a + 1`. Caused by error: ! object 'a' not found dplyr/tests/testthat/_snaps/rank.md0000644000176200001440000000103614416000534017143 0ustar liggesusers# ntile() validates `n` Code ntile(1, n = 1.5) Condition Error in `ntile()`: ! `n` must be a whole number, not the number 1.5. --- Code ntile(1, n = c(1, 2)) Condition Error in `ntile()`: ! `n` must be a whole number, not a double vector. --- Code ntile(1, n = NA_real_) Condition Error in `ntile()`: ! `n` must be a whole number, not a numeric `NA`. --- Code ntile(1, n = 0) Condition Error in `ntile()`: ! `n` must be positive. dplyr/tests/testthat/_snaps/all-equal.md0000644000176200001440000000635314416000506020073 0ustar liggesusers# all_equal is deprecated Code all_equal(mtcars, mtcars) Condition Warning: `all_equal()` was deprecated in dplyr 1.1.0. i Please use `all.equal()` instead. i And manually order the rows/cols as needed Output [1] TRUE # data frames not equal if missing row Code all_equal(mtcars, mtcars[-1, ]) Output [1] "Different number of rows." Code all_equal(iris, iris[-1, ]) Output [1] "Different number of rows." Code all_equal(df_all, df_all[-1, ]) Output [1] "Different number of rows." # data frames not equal if missing col Code all_equal(mtcars, mtcars[, -1]) Output Different number of columns: 11 vs 10. Code all_equal(iris, iris[, -1]) Output Different number of columns: 5 vs 4. Code all_equal(df_all, df_all[, -1]) Output Different number of columns: 7 vs 6. # factors equal only if levels equal Code all_equal(df1, df2) Output Different types for column `x`: factor<38051> vs factor. Code all_equal(df2, df1) Output Different types for column `x`: factor vs factor<38051>. # factor comparison requires strict equality of levels (#2440) Code all_equal(df1, df2) Output Different types for column `x`: factor<4d52a> vs factor<38051>. Code all_equal(df2, df1) Output Different types for column `x`: factor<38051> vs factor<4d52a>. # equality test fails when convert is FALSE and types don't match (#1484) Code all_equal(df1, df2, convert = FALSE) Output Different types for column `x`: character vs factor<4d52a>. # equality returns a message for convert = TRUE Code all_equal(df1, df2) Output Different types for column `x`: integer vs character. Code all_equal(df1, df2, convert = TRUE) Output Incompatible types for column `x`: integer vs character. # numeric and integer can be compared if convert = TRUE Code all_equal(df1, df2) Output Different types for column `x`: integer vs double. # returns vector for more than one difference (#1819) Code all_equal(tibble(a = 1, b = 2), tibble(a = 1L, b = 2L)) Output Different types for column `a`: double vs integer. Different types for column `b`: double vs integer. # ignore column order Code all_equal(tibble(a = 1, b = 2), tibble(b = 2, a = 1), ignore_col_order = FALSE) Output Same column names, but different order. Code all_equal(tibble(a = 1, b = 2), tibble(a = 1), ignore_col_order = FALSE) Output Different number of columns: 2 vs 1. # count() give meaningful errors Code (expect_error(union(tibble(a = 1), tibble(a = "1")))) Output Error in `union()`: ! `x` and `y` are not compatible. x Incompatible types for column `a`: double vs character. Code (expect_error(union(tibble(a = 1, b = 2), tibble(a = "1", b = "2")))) Output Error in `union()`: ! `x` and `y` are not compatible. x Incompatible types for column `a`: double vs character. x Incompatible types for column `b`: double vs character. dplyr/tests/testthat/_snaps/arrange.md0000644000176200001440000000342314416000506017630 0ustar liggesusers# arrange() gives meaningful errors Code (expect_error(tibble(x = 1, x = 1, .name_repair = "minimal") %>% arrange(x))) Output Error in `arrange()`: ! Can't transform a data frame with duplicate names. Code (expect_error(tibble(x = 1) %>% arrange(y))) Output Error in `arrange()`: i In argument: `..1 = y`. Caused by error: ! object 'y' not found Code (expect_error(tibble(x = 1) %>% arrange(rep(x, 2)))) Output Error in `arrange()`: i In argument: `..1 = rep(x, 2)`. Caused by error: ! `..1` must be size 1, not 2. # arrange errors if stringi is not installed and a locale identifier is used Code locale_to_chr_proxy_collate("fr", has_stringi = FALSE) Condition Error: ! stringi >=1.5.3 is required to arrange in a different locale. # arrange validates `.locale` Code arrange(df, .locale = 1) Condition Error in `arrange()`: ! `.locale` must be a string or `NULL`. --- Code arrange(df, .locale = c("en_US", "fr_BF")) Condition Error in `arrange()`: ! If `.locale` is a character vector, it must be a single string. # arrange validates that `.locale` must be one from stringi Code arrange(df, .locale = "x") Condition Error in `arrange()`: ! `.locale` must be one of the locales within `stringi::stri_locale_list()`. # desc() inside arrange() checks the number of arguments (#5921) Code df <- data.frame(x = 1, y = 2) (expect_error(arrange(df, desc(x, y)))) Output Error in `arrange()`: ! `desc()` must be called with exactly one argument. dplyr/tests/testthat/_snaps/deprec-funs.md0000644000176200001440000000167414416000516020433 0ustar liggesusers# funs() is deprecated Code funs(fn = bar) Condition Warning: `funs()` was deprecated in dplyr 0.8.0. i Please use a list of either functions or lambdas: # Simple named list: list(mean = mean, median = median) # Auto named with `tibble::lst()`: tibble::lst(mean, median) # Using lambdas list(~ mean(., trim = .2), ~ median(., na.rm = TRUE)) Output $ fn: bar(.) # funs() give meaningful error messages Code (expect_error(funs(function(si) { mp[si] }))) Output Error in `funs()`: ! `function(si) { mp[si] }` must be a function name (quoted or unquoted) or an unquoted call, not `function`. Code (expect_error(funs(~ mp[.]))) Output Error in `funs()`: ! `~mp[.]` must be a function name (quoted or unquoted) or an unquoted call, not `~`. dplyr/tests/testthat/_snaps/consecutive-id.md0000644000176200001440000000103314416000514021124 0ustar liggesusers# follows recycling rules Code consecutive_id(1:3, 1:4) Condition Error in `consecutive_id()`: ! Can't recycle `..1` (size 3) to match `..2` (size 4). # generates useful errors Code consecutive_id(x = 1:4) Condition Error in `consecutive_id()`: ! Arguments in `...` must be passed by position, not name. x Problematic argument: * x = 1:4 Code consecutive_id(mean) Condition Error in `consecutive_id()`: ! `..1` must be a vector, not a function. dplyr/tests/testthat/_snaps/group-by.md0000644000176200001440000000274614416000522017762 0ustar liggesusers# can't rename while partially `ungroup()`-ing (#6606) Code ungroup(gdf, g2 = g) Condition Error in `ungroup()`: ! Can't rename variables in this context. # select(group_by(.)) implicitly adds grouping variables (#170) Code res <- mtcars %>% group_by(vs) %>% select(mpg) Message Adding missing grouping variables: `vs` # group_by works with zero-row data frames (#486) Code x <- select(dfg, a) Message Adding missing grouping variables: `g` # group_by() and ungroup() give meaningful error messages Code df <- tibble(x = 1, y = 2) (expect_error(df %>% group_by(unknown))) Output Error in `group_by()`: ! Must group by variables found in `.data`. x Column `unknown` is not found. Code (expect_error(df %>% ungroup(x))) Output Error in `ungroup()`: ! `...` must be empty. x Problematic argument: * ..1 = x i Did you forget to name an argument? Code (expect_error(df %>% group_by(x, y) %>% ungroup(z))) Output Error in `ungroup()`: ! Can't subset columns that don't exist. x Column `z` doesn't exist. Code (expect_error(df %>% group_by(z = a + 1))) Output Error in `group_by()`: i In argument: `z = a + 1`. Caused by error: ! object 'a' not found dplyr/tests/testthat/_snaps/sets.md0000644000176200001440000000653614472225345017213 0ustar liggesusers# extra arguments in ... error (#5891) Code intersect(df1, df2, z = 3) Condition Error in `intersect()`: ! `...` must be empty. x Problematic argument: * z = 3 Code union(df1, df2, z = 3) Condition Error in `union()`: ! `...` must be empty. x Problematic argument: * z = 3 Code union_all(df1, df2, z = 3) Condition Error in `union_all()`: ! `...` must be empty. x Problematic argument: * z = 3 Code setdiff(df1, df2, z = 3) Condition Error in `setdiff()`: ! `...` must be empty. x Problematic argument: * z = 3 Code symdiff(df1, df2, z = 3) Condition Error in `symdiff()`: ! `...` must be empty. x Problematic argument: * z = 3 # incompatible data frames error (#903) Code intersect(df1, df2) Condition Error in `intersect()`: ! `x` and `y` are not compatible. x Different number of columns: 1 vs 2. Code union(df1, df2) Condition Error in `union()`: ! `x` and `y` are not compatible. x Different number of columns: 1 vs 2. Code union_all(df1, df2) Condition Error in `union_all()`: ! `x` and `y` are not compatible. x Different number of columns: 1 vs 2. Code setdiff(df1, df2) Condition Error in `setdiff()`: ! `x` and `y` are not compatible. x Different number of columns: 1 vs 2. Code symdiff(df1, df2) Condition Error in `symdiff()`: ! `x` and `y` are not compatible. x Different number of columns: 1 vs 2. # is_compatible generates useful messages for different cases Code cat(is_compatible(tibble(x = 1), 1)) Output `y` must be a data frame. Code cat(is_compatible(tibble(x = 1), tibble(x = 1, y = 2))) Output Different number of columns: 1 vs 2. Code cat(is_compatible(tibble(x = 1, y = 1), tibble(y = 1, x = 1), ignore_col_order = FALSE)) Output Same column names, but different order. Code cat(is_compatible(tibble(x = 1), tibble(y = 1))) Output Cols in `y` but not `x`: `y`. Cols in `x` but not `y`: `x`. Code cat(is_compatible(tibble(x = 1), tibble(x = 1L), convert = FALSE)) Output Different types for column `x`: double vs integer. Code cat(is_compatible(tibble(x = 1), tibble(x = "a"))) Output Incompatible types for column `x`: double vs character. # setequal tibbles must have same rows and columns Code setequal(tibble(x = 1:2), tibble(y = 1:2)) Condition Error in `setequal()`: ! `x` and `y` are not compatible. x Cols in `y` but not `x`: `y`. x Cols in `x` but not `y`: `x`. --- Code setequal(tibble(x = 1:2), tibble(x = c("a", "b"))) Condition Error in `setequal()`: ! `x` and `y` are not compatible. x Incompatible types for column `x`: integer vs character. # setequal checks y is a data frame Code setequal(mtcars, 1) Condition Error in `setequal()`: ! `x` and `y` are not compatible. `y` must be a data frame. # setequal checks for extra arguments Code setequal(mtcars, mtcars, z = 2) Condition Error in `setequal()`: ! `...` must be empty. x Problematic argument: * z = 2 dplyr/tests/testthat/_snaps/deprec-dbi.md0000644000176200001440000000034614416000516020211 0ustar liggesusers# src_sqlite() gives meaningful error messages Code (expect_error(src_sqlite(":memory:"))) Output Error in `src_sqlite()`: ! `path` must already exist, unless `create` = TRUE. dplyr/tests/testthat/_snaps/if-else.md0000644000176200001440000000334014416000523017532 0ustar liggesusers# takes the common type of `true` and `false` (#6243) Code if_else(TRUE, 1, "x") Condition Error in `if_else()`: ! Can't combine `true` and `false` . # includes `missing` in the common type computation if used Code if_else(TRUE, 1, 2, missing = "x") Condition Error in `if_else()`: ! Can't combine `true` and `missing` . # `condition` must be logical (and isn't cast to logical!) Code if_else(1:10, 1, 2) Condition Error in `if_else()`: ! `condition` must be a logical vector, not an integer vector. # `true`, `false`, and `missing` must recycle to the size of `condition` Code if_else(x < 2, bad, x) Condition Error in `if_else()`: ! `true` must have size 3, not size 2. --- Code if_else(x < 2, x, bad) Condition Error in `if_else()`: ! `false` must have size 3, not size 2. --- Code if_else(x < 2, x, x, missing = bad) Condition Error in `if_else()`: ! `missing` must have size 3, not size 2. # must have empty dots Code if_else(TRUE, 1, 2, missing = 3, 4) Condition Error in `if_else()`: ! `...` must be empty. x Problematic argument: * ..1 = 4 i Did you forget to name an argument? # `ptype` overrides the common type Code if_else(TRUE, 1L, 2.5, ptype = integer()) Condition Error in `if_else()`: ! Can't convert from `false` to due to loss of precision. * Locations: 1 # `size` overrides the `condition` size Code if_else(TRUE, 1, 2, size = 2) Condition Error in `if_else()`: ! `condition` must have size 2, not size 1. dplyr/tests/testthat/_snaps/conditions.md0000644000176200001440000004005214416000514020360 0ustar liggesusers# can pass verb-level error call Code mutate(mtcars, 1 + "") Condition Error in `foo()`: i In argument: `1 + ""`. Caused by error in `1 + ""`: ! non-numeric argument to binary operator Code transmute(mtcars, 1 + "") Condition Error in `foo()`: i In argument: `1 + ""`. Caused by error in `1 + ""`: ! non-numeric argument to binary operator Code summarise(mtcars, 1 + "") Condition Error in `foo()`: i In argument: `1 + ""`. Caused by error in `1 + ""`: ! non-numeric argument to binary operator Code summarise(group_by(mtcars, cyl), 1 + "") Condition Error in `foo()`: i In argument: `1 + ""`. i In group 1: `cyl = 4`. Caused by error in `1 + ""`: ! non-numeric argument to binary operator Code filter(mtcars, 1 + "") Condition Error in `foo()`: i In argument: `1 + ""`. Caused by error in `1 + ""`: ! non-numeric argument to binary operator Code arrange(mtcars, 1 + "") Condition Error in `foo()`: i In argument: `..1 = 1 + ""`. Caused by error in `1 + ""`: ! non-numeric argument to binary operator Code select(mtcars, 1 + "") Condition Error in `foo()`: ! Problem while evaluating `1 + ""`. Caused by error in `1 + ""`: ! non-numeric argument to binary operator Code slice(mtcars, 1 + "") Condition Error in `foo()`: i In argument: `1 + ""`. Caused by error in `1 + ""`: ! non-numeric argument to binary operator # can pass verb-level error call (example case) Code my_verb(mtcars, 1 + "", am) Condition Error in `my_verb()`: i In argument: `.result = (1 + "") * am`. Caused by error in `1 + ""`: ! non-numeric argument to binary operator Code my_verb(mtcars, cyl, c(am, vs)) Condition Error in `my_verb()`: i In argument: `.result = cyl * c(am, vs)`. Caused by error: ! `.result` must be size 32 or 1, not 64. # `err_locs()` works as expected Code err_locs(1.5) Condition Error in `err_locs()`: ! `x` must be an integer vector of locations. i This is an internal error that was detected in the dplyr package. Please report it at with a reprex () and the full backtrace. --- Code err_locs(integer()) Condition Error in `err_locs()`: ! `x` must have at least 1 location. i This is an internal error that was detected in the dplyr package. Please report it at with a reprex () and the full backtrace. --- Code err_locs(1L) Output `c(1)` Code err_locs(1:5) Output `c(1, 2, 3, 4, 5)` Code err_locs(1:6) Output `c(1, 2, 3, 4, 5)` and 1 more Code err_locs(1:7) Output `c(1, 2, 3, 4, 5)` and 2 more # errors during dots collection are not enriched (#6178) Code mutate(mtcars, !!foobarbaz()) Condition Error in `foobarbaz()`: ! could not find function "foobarbaz" Code transmute(mtcars, !!foobarbaz()) Condition Error in `foobarbaz()`: ! could not find function "foobarbaz" Code select(mtcars, !!foobarbaz()) Condition Error in `foobarbaz()`: ! could not find function "foobarbaz" Code arrange(mtcars, !!foobarbaz()) Condition Error in `foobarbaz()`: ! could not find function "foobarbaz" Code filter(mtcars, !!foobarbaz()) Condition Error in `foobarbaz()`: ! could not find function "foobarbaz" # warnings are collected for `last_dplyr_warnings()` Code # Ungrouped df %>% mutate(x = f()) %>% invisible() Condition Warning: There was 1 warning in `mutate()`. i In argument: `x = f()`. Caused by warning in `f()`: ! msg Code last_dplyr_warnings() Output [[1]] Warning in `mutate()`: i In argument: `x = f()`. Caused by warning in `f()`: ! msg --- Backtrace: x 1. +-df %>% mutate(x = f()) %>% invisible() 2. +-dplyr::mutate(., x = f()) 3. \-dplyr:::mutate.data.frame(., x = f()) --- Code # Grouped df %>% group_by(id) %>% mutate(x = f()) %>% invisible() Condition Warning: There were 2 warnings in `mutate()`. The first warning was: i In argument: `x = f()`. i In group 1: `id = 1`. Caused by warning in `f()`: ! msg i Run `dplyr::last_dplyr_warnings()` to see the 1 remaining warning. Code last_dplyr_warnings() Output [[1]] Warning in `mutate()`: i In argument: `x = f()`. i In group 1: `id = 1`. Caused by warning in `f()`: ! msg --- Backtrace: x 1. +-df %>% group_by(id) %>% mutate(x = f()) %>% invisible() 2. +-dplyr::mutate(., x = f()) 3. \-dplyr:::mutate.data.frame(., x = f()) [[2]] Warning in `mutate()`: i In argument: `x = f()`. i In group 2: `id = 2`. Caused by warning in `f()`: ! msg --- Backtrace: x 1. +-df %>% group_by(id) %>% mutate(x = f()) %>% invisible() 2. +-dplyr::mutate(., x = f()) 3. \-dplyr:::mutate.data.frame(., x = f()) --- Code # Rowwise df %>% rowwise() %>% mutate(x = f()) %>% invisible() Condition Warning: There were 2 warnings in `mutate()`. The first warning was: i In argument: `x = f()`. i In row 1. Caused by warning in `f()`: ! msg i Run `dplyr::last_dplyr_warnings()` to see the 1 remaining warning. Code last_dplyr_warnings() Output [[1]] Warning in `mutate()`: i In argument: `x = f()`. i In row 1. Caused by warning in `f()`: ! msg --- Backtrace: x 1. +-df %>% rowwise() %>% mutate(x = f()) %>% invisible() 2. +-dplyr::mutate(., x = f()) 3. \-dplyr:::mutate.data.frame(., x = f()) [[2]] Warning in `mutate()`: i In argument: `x = f()`. i In row 2. Caused by warning in `f()`: ! msg --- Backtrace: x 1. +-df %>% rowwise() %>% mutate(x = f()) %>% invisible() 2. +-dplyr::mutate(., x = f()) 3. \-dplyr:::mutate.data.frame(., x = f()) --- Code # Multiple type of warnings within multiple verbs df %>% group_by(g = f():n()) %>% rowwise() %>% mutate(x = f()) %>% group_by(id) %>% mutate(x = f()) %>% invisible() Condition Warning: There was 1 warning in `group_by()`. i In argument: `g = f():n()`. Caused by warning in `f()`: ! msg Warning: There were 2 warnings in `mutate()`. The first warning was: i In argument: `x = f()`. i In row 1. Caused by warning in `f()`: ! msg i Run `dplyr::last_dplyr_warnings()` to see the 1 remaining warning. Warning: There were 2 warnings in `mutate()`. The first warning was: i In argument: `x = f()`. i In group 1: `id = 1`. Caused by warning in `f()`: ! msg i Run `dplyr::last_dplyr_warnings()` to see the 1 remaining warning. Code last_dplyr_warnings() Output [[1]] Warning in `group_by()`: i In argument: `g = f():n()`. Caused by warning in `f()`: ! msg --- Backtrace: x 1. +-... %>% invisible() 2. +-dplyr::mutate(., x = f()) 3. +-dplyr::group_by(., id) 4. +-dplyr::mutate(., x = f()) 5. +-dplyr::rowwise(.) 6. +-dplyr::group_by(., g = f():n()) 7. \-dplyr:::group_by.data.frame(., g = f():n()) [[2]] Warning in `mutate()`: i In argument: `x = f()`. i In row 1. Caused by warning in `f()`: ! msg --- Backtrace: x 1. +-... %>% invisible() 2. +-dplyr::mutate(., x = f()) 3. +-dplyr::group_by(., id) 4. +-dplyr::mutate(., x = f()) 5. \-dplyr:::mutate.data.frame(., x = f()) [[3]] Warning in `mutate()`: i In argument: `x = f()`. i In row 2. Caused by warning in `f()`: ! msg --- Backtrace: x 1. +-... %>% invisible() 2. +-dplyr::mutate(., x = f()) 3. +-dplyr::group_by(., id) 4. +-dplyr::mutate(., x = f()) 5. \-dplyr:::mutate.data.frame(., x = f()) [[4]] Warning in `mutate()`: i In argument: `x = f()`. i In group 1: `id = 1`. Caused by warning in `f()`: ! msg --- Backtrace: x 1. +-... %>% invisible() 2. +-dplyr::mutate(., x = f()) 3. \-dplyr:::mutate.data.frame(., x = f()) [[5]] Warning in `mutate()`: i In argument: `x = f()`. i In group 2: `id = 2`. Caused by warning in `f()`: ! msg --- Backtrace: x 1. +-... %>% invisible() 2. +-dplyr::mutate(., x = f()) 3. \-dplyr:::mutate.data.frame(., x = f()) --- Code # Truncated (1 more) df %>% rowwise() %>% mutate(x = f()) Condition Warning: There were 2 warnings in `mutate()`. The first warning was: i In argument: `x = f()`. i In row 1. Caused by warning in `f()`: ! msg i Run `dplyr::last_dplyr_warnings()` to see the 1 remaining warning. Output # A tibble: 2 x 2 # Rowwise: id x 1 1 1 2 2 1 Code last_dplyr_warnings(n = 1) Output [[1]] Warning in `mutate()`: i In argument: `x = f()`. i In row 1. Caused by warning in `f()`: ! msg --- Backtrace: x 1. +-df %>% rowwise() %>% mutate(x = f()) 2. +-dplyr::mutate(., x = f()) 3. \-dplyr:::mutate.data.frame(., x = f()) Message ... with 1 more warning. i Run `dplyr::last_dplyr_warnings(n = 2)` to show more. --- Code # Truncated (several more) df <- tibble(id = 1:5) df %>% rowwise() %>% mutate(x = f()) Condition Warning: There were 5 warnings in `mutate()`. The first warning was: i In argument: `x = f()`. i In row 1. Caused by warning in `f()`: ! msg i Run `dplyr::last_dplyr_warnings()` to see the 4 remaining warnings. Output # A tibble: 5 x 2 # Rowwise: id x 1 1 1 2 2 1 3 3 1 4 4 1 5 5 1 Code last_dplyr_warnings(n = 1) Output [[1]] Warning in `mutate()`: i In argument: `x = f()`. i In row 1. Caused by warning in `f()`: ! msg --- Backtrace: x 1. +-df %>% rowwise() %>% mutate(x = f()) 2. +-dplyr::mutate(., x = f()) 3. \-dplyr:::mutate.data.frame(., x = f()) Message ... with 4 more warnings. i Run `dplyr::last_dplyr_warnings(n = 2)` to show more. # complex backtraces with base and rlang warnings Code foo() Condition Warning: There was 1 warning in `group_by()`. i In argument: `x = f(1):n()`. Caused by warning in `h()`: ! foo Warning: There were 3 warnings in `mutate()`. The first warning was: i In argument: `x = f(1, base = FALSE)`. i In group 1: `x = 1`. Caused by warning: ! foo i Run `dplyr::last_dplyr_warnings()` to see the 2 remaining warnings. Output # A tibble: 3 x 2 # Groups: x [1] id x 1 1 1 2 2 1 3 3 1 Code last_dplyr_warnings() Output [[1]] Warning in `group_by()`: i In argument: `x = f(1):n()`. Caused by warning in `h()`: ! foo --- Backtrace: x 1. +-dplyr (local) foo() 2. | \-dplyr (local) bar() 3. | \-df %>% group_by(x = f(1):n()) %>% mutate(x = f(1, base = FALSE)) 4. +-dplyr::mutate(., x = f(1, base = FALSE)) 5. +-dplyr::group_by(., x = f(1):n()) 6. \-dplyr:::group_by.data.frame(., x = f(1):n()) [[2]] Warning in `mutate()`: i In argument: `x = f(1, base = FALSE)`. i In group 1: `x = 1`. Caused by warning: ! foo --- Backtrace: x 1. +-dplyr (local) foo() 2. | \-dplyr (local) bar() 3. | \-df %>% group_by(x = f(1):n()) %>% mutate(x = f(1, base = FALSE)) 4. +-dplyr::mutate(., x = f(1, base = FALSE)) 5. \-dplyr:::mutate.data.frame(., x = f(1, base = FALSE)) [[3]] Warning in `mutate()`: i In argument: `x = f(1, base = FALSE)`. i In group 2: `x = 2`. Caused by warning: ! foo --- Backtrace: x 1. +-dplyr (local) foo() 2. | \-dplyr (local) bar() 3. | \-df %>% group_by(x = f(1):n()) %>% mutate(x = f(1, base = FALSE)) 4. +-dplyr::mutate(., x = f(1, base = FALSE)) 5. \-dplyr:::mutate.data.frame(., x = f(1, base = FALSE)) [[4]] Warning in `mutate()`: i In argument: `x = f(1, base = FALSE)`. i In group 3: `x = 3`. Caused by warning: ! foo --- Backtrace: x 1. +-dplyr (local) foo() 2. | \-dplyr (local) bar() 3. | \-df %>% group_by(x = f(1):n()) %>% mutate(x = f(1, base = FALSE)) 4. +-dplyr::mutate(., x = f(1, base = FALSE)) 5. \-dplyr:::mutate.data.frame(., x = f(1, base = FALSE)) # can collect warnings in main verbs Code invisible(mtcars %>% rowwise() %>% filter(f()) %>% arrange(f()) %>% mutate(a = f()) %>% summarise(b = f())) Condition Warning: There were 32 warnings in `filter()`. The first warning was: i In argument: `f()`. i In row 1. Caused by warning in `f()`: ! foo i Run `dplyr::last_dplyr_warnings()` to see the 31 remaining warnings. Warning: There was 1 warning in `arrange()`. i In argument: `..1 = f()`. Caused by warning in `f()`: ! foo Warning: There were 32 warnings in `mutate()`. The first warning was: i In argument: `a = f()`. i In row 1. Caused by warning in `f()`: ! foo i Run `dplyr::last_dplyr_warnings()` to see the 31 remaining warnings. Warning: There were 32 warnings in `summarise()`. The first warning was: i In argument: `b = f()`. i In row 1. Caused by warning in `f()`: ! foo i Run `dplyr::last_dplyr_warnings()` to see the 31 remaining warnings. Code warnings <- last_dplyr_warnings(Inf) warnings[[1]] Output Warning in `filter()`: i In argument: `f()`. i In row 1. Caused by warning in `f()`: ! foo Code warnings[[33]] Output Warning in `arrange()`: i In argument: `..1 = f()`. Caused by warning in `f()`: ! foo Code warnings[[65]] Output Warning in `mutate()`: i In argument: `a = f()`. i In row 32. Caused by warning in `f()`: ! foo Code warnings[[97]] Output Warning in `summarise()`: i In argument: `b = f()`. i In row 32. Caused by warning in `f()`: ! foo dplyr/tests/testthat/_snaps/colwise-mutate.md0000644000176200001440000000405214416000511021146 0ustar liggesusers# selection works with grouped data frames (#2624) Code out <- mutate_if(gdf, is.factor, as.character) Message `mutate_if()` ignored the following grouping variables: * Column `Species` # colwise verbs deprecate quosures (#4330) Code (expect_warning(mutate_at(mtcars, vars(mpg), quo(mean(.))))) Output Warning: The `...` argument of `mutate_at()` can't contain quosures as of dplyr 0.8.3. i Please use a one-sided formula, a function, or a function name. Code (expect_warning(summarise_at(mtcars, vars(mpg), quo(mean(.))))) Output Warning: The `...` argument of `summarise_at()` can't contain quosures as of dplyr 0.8.3. i Please use a one-sided formula, a function, or a function name. # colwise mutate gives meaningful error messages Code (expect_error(mutate_at(tibble(), "test", ~1))) Output Error in `tbl_at_vars()`: ! Can't subset columns that don't exist. x Column `test` doesn't exist. Code tbl <- tibble(gr1 = rep(1:2, 4), gr2 = rep(1:2, each = 4), x = 1:8) tbl <- group_by(tbl, gr1) (expect_error(summarise_at(tbl, vars(gr1), mean))) Output Error in `tbl_at_vars()`: ! Can't subset columns that don't exist. x Column `gr1` doesn't exist. Code (expect_error(mutate_all(mtcars, length, 0, 0))) Output Error in `mutate()`: i In argument: `mpg = .Primitive("length")(mpg, 0, 0)`. Caused by error: ! 3 arguments passed to 'length' which requires 1 Code (expect_error(mutate_all(mtcars, mean, na.rm = TRUE, na.rm = TRUE))) Output Error in `mutate()`: i In argument: `mpg = (function (x, ...) ...`. Caused by error in `mean.default()`: ! formal argument "na.rm" matched by multiple actual arguments dplyr/tests/testthat/_snaps/count-tally.md0000644000176200001440000000475614416000515020476 0ustar liggesusers# name must be string Code count(df, x, name = 1) Condition Error in `tally()`: ! `name` must be a single string, not the number 1. --- Code count(df, x, name = letters) Condition Error in `tally()`: ! `name` must be a single string, not a character vector. # can only explicitly chain together multiple tallies Code df <- data.frame(g = c(1, 1, 2, 2), n = 1:4) df %>% count(g, wt = n) Output g n 1 1 3 2 2 7 Code df %>% count(g, wt = n) %>% count(wt = n) Output n 1 10 Code df %>% count(n) Message Storing counts in `nn`, as `n` already present in input i Use `name = "new_name"` to pick a new name. Output n nn 1 1 1 2 2 1 3 3 1 4 4 1 # count() owns errors (#6139) Code (expect_error(count(mtcars, new = 1 + ""))) Output Error in `count()`: i In argument: `new = 1 + ""`. Caused by error in `1 + ""`: ! non-numeric argument to binary operator Code (expect_error(count(mtcars, wt = 1 + ""))) Output Error in `count()`: i In argument: `n = sum(1 + "", na.rm = TRUE)`. Caused by error in `1 + ""`: ! non-numeric argument to binary operator # tally() owns errors (#6139) Code (expect_error(tally(mtcars, wt = 1 + ""))) Output Error in `tally()`: i In argument: `n = sum(1 + "", na.rm = TRUE)`. Caused by error in `1 + ""`: ! non-numeric argument to binary operator # add_count() owns errors (#6139) Code (expect_error(add_count(mtcars, new = 1 + ""))) Output Error in `add_count()`: i In argument: `new = 1 + ""`. Caused by error in `1 + ""`: ! non-numeric argument to binary operator Code (expect_error(add_count(mtcars, wt = 1 + ""))) Output Error in `add_count()`: i In argument: `n = sum(1 + "", na.rm = TRUE)`. Caused by error in `1 + ""`: ! non-numeric argument to binary operator # add_tally() owns errors (#6139) Code (expect_error(add_tally(mtcars, wt = 1 + ""))) Output Error in `add_tally()`: i In argument: `n = sum(1 + "", na.rm = TRUE)`. Caused by error in `1 + ""`: ! non-numeric argument to binary operator dplyr/tests/testthat/_snaps/case-match.md0000644000176200001440000000243114416000507020215 0ustar liggesusers# requires at least one condition Code case_match(1) Condition Error in `case_match()`: ! At least one condition must be supplied. --- Code case_match(1, NULL) Condition Error in `case_match()`: ! At least one condition must be supplied. # `.default` is part of common type computation Code case_match(1, 1 ~ 1L, .default = "x") Condition Error in `case_match()`: ! Can't combine `..1 (right)` and `.default` . # `NULL` formula element throws meaningful error Code case_match(1, 1 ~ NULL) Condition Error in `case_match()`: ! `..1 (right)` must be a vector, not `NULL`. --- Code case_match(1, NULL ~ 1) Condition Error in `case_match()`: ! `..1 (left)` must be a vector, not `NULL`. # throws chained errors when formula evaluation fails Code case_match(1, 1 ~ 2, 3 ~ stop("oh no!")) Condition Error in `case_match()`: ! Failed to evaluate the right-hand side of formula 2. Caused by error: ! oh no! --- Code case_match(1, 1 ~ 2, stop("oh no!") ~ 4) Condition Error in `case_match()`: ! Failed to evaluate the left-hand side of formula 2. Caused by error: ! oh no! dplyr/tests/testthat/_snaps/locale.md0000644000176200001440000000031314416000530017440 0ustar liggesusers# `dplyr_legacy_locale()` respects `dplyr.legacy_locale` Code dplyr_legacy_locale() Condition Error: ! Global option `dplyr.legacy_locale` must be a single `TRUE` or `FALSE`. dplyr/tests/testthat/_snaps/transmute.md0000644000176200001440000000113514416000544020233 0ustar liggesusers# transmute() error messages Code (expect_error(transmute(mtcars, cyl2 = cyl, .keep = "all"))) Output Error in `transmute()`: ! The `.keep` argument is not supported. Code (expect_error(transmute(mtcars, cyl2 = cyl, .before = disp))) Output Error in `transmute()`: ! The `.before` argument is not supported. Code (expect_error(transmute(mtcars, cyl2 = cyl, .after = disp))) Output Error in `transmute()`: ! The `.after` argument is not supported. dplyr/tests/testthat/_snaps/na-if.md0000644000176200001440000000162014416000533017200 0ustar liggesusers# is type stable on `x` Code na_if(0L, 1.5) Condition Error in `na_if()`: ! Can't convert from `y` to `x` due to loss of precision. * Locations: 1 # is size stable on `x` Code na_if(1, integer()) Condition Error in `na_if()`: ! Can't recycle `y` (size 0) to size 1. --- Code na_if(1, c(1, 2)) Condition Error in `na_if()`: ! Can't recycle `y` (size 2) to size 1. --- Code na_if(c(1, 2, 3), c(1, 2)) Condition Error in `na_if()`: ! Can't recycle `y` (size 2) to size 3. # requires vector types for `x` and `y` Code na_if(environment(), 1) Condition Error in `na_if()`: ! `x` must be a vector, not an environment. --- Code na_if(1, environment()) Condition Error in `na_if()`: ! `y` must be a vector, not an environment. dplyr/tests/testthat/_snaps/rowwise.md0000644000176200001440000000477414416000537017726 0ustar liggesusers# rowwise has decent print method Code rf Output # A tibble: 5 x 1 # Rowwise: x x 1 1 2 2 3 3 4 4 5 5 # validate_rowwise_df() gives useful errors Code (expect_error(validate_rowwise_df(df1))) Output Error in `validate_rowwise_df()`: ! The `.rows` column must be a list of size 1, one-based integer vectors with the right value. Code (expect_error(validate_rowwise_df(df2))) Output Error in `validate_rowwise_df()`: ! The last column of the `groups` attribute must be called `.rows`. Code (expect_error(validate_rowwise_df(df3))) Output Error in `validate_rowwise_df()`: ! The `groups` attribute must be a data frame. Code (expect_error(validate_rowwise_df(df4))) Output Error in `validate_rowwise_df()`: ! The `groups` attribute must be a data frame. Code (expect_error(validate_rowwise_df(df7))) Output Error in `validate_rowwise_df()`: ! The `.rows` column must be a list of size 1, one-based integer vectors with the right value. Code (expect_error(attr(df8, "groups")$.rows <- 1:8)) Output Error in `$<-`: ! Assigned data `1:8` must be compatible with existing data. x Existing data has 10 rows. x Assigned data has 8 rows. i Only vectors of size 1 are recycled. Caused by error in `vectbl_recycle_rhs_rows()`: ! Can't recycle input of size 8 to size 10. Code (expect_error(validate_rowwise_df(df10))) Output Error in `validate_rowwise_df()`: ! The `groups` attribute must be a data frame. Code (expect_error(validate_rowwise_df(df11))) Output Error in `validate_rowwise_df()`: ! The `groups` attribute must be a data frame. Code (expect_error(new_rowwise_df(tibble(x = 1:10), tibble(".rows" := list(1:5, -1L)))) ) Output Error in `new_rowwise_df()`: ! `group_data` must be a tibble without a `.rows` column. Code (expect_error(new_rowwise_df(tibble(x = 1:10), 1:10))) Output Error in `new_rowwise_df()`: ! `group_data` must be a tibble without a `.rows` column. dplyr/tests/testthat/_snaps/join-rows.md0000644000176200001440000002757614525503021020160 0ustar liggesusers# `relationship` default behavior is correct Code out <- join_rows(c(1, 1), c(1, 1), condition = "==") Condition Warning: Detected an unexpected many-to-many relationship between `x` and `y`. i Row 1 of `x` matches multiple rows in `y`. i Row 1 of `y` matches multiple rows in `x`. i If a many-to-many relationship is expected, set `relationship = "many-to-many"` to silence this warning. # join_rows() allows `unmatched` to be specified independently for inner joins Code join_rows(c(1, 3), c(1, 2), type = "inner", unmatched = c("drop", "error")) Condition Error: ! Each row of `y` must be matched by `x`. i Row 2 of `y` was not matched. # join_rows() expects incompatible type errors to have been handled by join_cast_common() Code (expect_error(join_rows(data.frame(x = 1), data.frame(x = factor("a"))))) Output Error: ! `join_cast_common()` should have handled this. i This is an internal error that was detected in the dplyr package. Please report it at with a reprex () and the full backtrace. # join_rows() gives meaningful one-to-one errors Code join_rows(1, c(1, 1), relationship = "one-to-one") Condition Error: ! Each row in `x` must match at most 1 row in `y`. i Row 1 of `x` matches multiple rows in `y`. --- Code join_rows(c(1, 1), 1, relationship = "one-to-one") Condition Error: ! Each row in `y` must match at most 1 row in `x`. i Row 1 of `y` matches multiple rows in `x`. # join_rows() gives meaningful one-to-many errors Code join_rows(c(1, 1), 1, relationship = "one-to-many") Condition Error: ! Each row in `y` must match at most 1 row in `x`. i Row 1 of `y` matches multiple rows in `x`. # join_rows() gives meaningful many-to-one errors Code join_rows(1, c(1, 1), relationship = "many-to-one") Condition Error: ! Each row in `x` must match at most 1 row in `y`. i Row 1 of `x` matches multiple rows in `y`. # join_rows() gives meaningful many-to-many warnings Code join_rows(c(1, 1), c(1, 1)) Condition Warning: Detected an unexpected many-to-many relationship between `x` and `y`. i Row 1 of `x` matches multiple rows in `y`. i Row 1 of `y` matches multiple rows in `x`. i If a many-to-many relationship is expected, set `relationship = "many-to-many"` to silence this warning. Output $x [1] 1 1 2 2 $y [1] 1 2 1 2 --- Code left_join(df, df, by = join_by(x)) Condition Warning in `left_join()`: Detected an unexpected many-to-many relationship between `x` and `y`. i Row 1 of `x` matches multiple rows in `y`. i Row 1 of `y` matches multiple rows in `x`. i If a many-to-many relationship is expected, set `relationship = "many-to-many"` to silence this warning. Output x 1 1 2 1 3 1 4 1 # join_rows() gives meaningful error message on unmatched rows Code join_rows(data.frame(x = c(1, 2)), data.frame(x = c(3, 1)), type = "left", unmatched = "error") Condition Error: ! Each row of `y` must be matched by `x`. i Row 1 of `y` was not matched. --- Code join_rows(data.frame(x = c(1, 2)), data.frame(x = c(3, 1)), type = "nest", unmatched = "error") Condition Error: ! Each row of `y` must be matched by `x`. i Row 1 of `y` was not matched. --- Code join_rows(data.frame(x = c(1, 2)), data.frame(x = c(3, 1)), type = "right", unmatched = "error") Condition Error: ! Each row of `x` must have a match in `y`. i Row 2 of `x` does not have a match. --- Code join_rows(data.frame(x = c(1, 2)), data.frame(x = 1), type = "inner", unmatched = "error") Condition Error: ! Each row of `x` must have a match in `y`. i Row 2 of `x` does not have a match. --- Code join_rows(data.frame(x = c(1, 2)), data.frame(x = 1), type = "inner", unmatched = c("error", "drop")) Condition Error: ! Each row of `x` must have a match in `y`. i Row 2 of `x` does not have a match. --- Code join_rows(data.frame(x = 1), data.frame(x = c(1, 2)), type = "inner", unmatched = "error") Condition Error: ! Each row of `y` must be matched by `x`. i Row 2 of `y` was not matched. --- Code join_rows(data.frame(x = 1), data.frame(x = c(1, 2)), type = "inner", unmatched = c("drop", "error")) Condition Error: ! Each row of `y` must be matched by `x`. i Row 2 of `y` was not matched. # join_rows() always errors on unmatched missing values Code join_rows(data.frame(x = 1), data.frame(x = NA), type = "left", unmatched = "error", na_matches = "na") Condition Error: ! Each row of `y` must be matched by `x`. i Row 1 of `y` was not matched. --- Code join_rows(data.frame(x = NA), data.frame(x = NA), type = "left", unmatched = "error", na_matches = "never") Condition Error: ! Each row of `y` must be matched by `x`. i Row 1 of `y` was not matched. --- Code join_rows(data.frame(x = 1), data.frame(x = NA), type = "nest", unmatched = "error", na_matches = "na") Condition Error: ! Each row of `y` must be matched by `x`. i Row 1 of `y` was not matched. --- Code join_rows(data.frame(x = NA), data.frame(x = NA), type = "nest", unmatched = "error", na_matches = "never") Condition Error: ! Each row of `y` must be matched by `x`. i Row 1 of `y` was not matched. --- Code join_rows(data.frame(x = NA), data.frame(x = 1), type = "right", unmatched = "error", na_matches = "na") Condition Error: ! Each row of `x` must have a match in `y`. i Row 1 of `x` does not have a match. --- Code join_rows(data.frame(x = NA), data.frame(x = NA), type = "right", unmatched = "error", na_matches = "never") Condition Error: ! Each row of `x` must have a match in `y`. i Row 1 of `x` does not have a match. --- Code join_rows(data.frame(x = 1), data.frame(x = c(1, NA)), type = "inner", unmatched = "error", na_matches = "na") Condition Error: ! Each row of `y` must be matched by `x`. i Row 2 of `y` was not matched. --- Code join_rows(data.frame(x = 1), data.frame(x = c(1, NA)), type = "inner", unmatched = c("drop", "error"), na_matches = "na") Condition Error: ! Each row of `y` must be matched by `x`. i Row 2 of `y` was not matched. --- Code join_rows(data.frame(x = c(1, NA)), data.frame(x = 1), type = "inner", unmatched = "error", na_matches = "na") Condition Error: ! Each row of `x` must have a match in `y`. i Row 2 of `x` does not have a match. --- Code join_rows(data.frame(x = c(1, NA)), data.frame(x = 1), type = "inner", unmatched = c("error", "drop"), na_matches = "na") Condition Error: ! Each row of `x` must have a match in `y`. i Row 2 of `x` does not have a match. --- Code join_rows(data.frame(x = NA), data.frame(x = NA), type = "inner", unmatched = "error", na_matches = "never") Condition Error: ! Each row of `x` must have a match in `y`. i Row 1 of `x` does not have a match. # join_rows() validates `unmatched` Code join_rows(df, df, unmatched = 1) Condition Error: ! `unmatched` must be a character vector, not the number 1. Code join_rows(df, df, unmatched = "foo") Condition Error: ! `unmatched` must be one of "drop" or "error", not "foo". Code join_rows(df, df, type = "left", unmatched = character()) Condition Error: ! `unmatched` must be length 1, not 0. Code join_rows(df, df, type = "left", unmatched = c("drop", "error")) Condition Error: ! `unmatched` must be length 1, not 2. Code join_rows(df, df, type = "inner", unmatched = character()) Condition Error: ! `unmatched` must be length 1 or 2, not 0. Code join_rows(df, df, type = "inner", unmatched = c("drop", "error", "error")) Condition Error: ! `unmatched` must be length 1 or 2, not 3. Code join_rows(df, df, type = "inner", unmatched = c("drop", "dr")) Condition Error: ! `unmatched` must be one of "drop" or "error", not "dr". i Did you mean "drop"? # join_rows() validates `relationship` Code join_rows(df, df, relationship = 1) Condition Error: ! `relationship` must be a string or character vector. --- Code join_rows(df, df, relationship = "none") Condition Error: ! `relationship` must be one of "one-to-one", "one-to-many", "many-to-one", or "many-to-many", not "none". --- Code join_rows(df, df, relationship = "warn-many-to-many") Condition Error: ! `relationship` must be one of "one-to-one", "one-to-many", "many-to-one", or "many-to-many", not "warn-many-to-many". i Did you mean "many-to-many"? # join_rows() rethrows overflow error nicely (#6912) Code join_rows(df, df, condition = ">=") Condition Error: ! This join would result in more rows than dplyr can handle. i 50000005000000 rows would be returned. 2147483647 rows is the maximum number allowed. i Double check your join keys. This error commonly occurs due to a missing join key, or an improperly specified join condition. # `multiple = NULL` is deprecated and results in `'all'` (#6731) Code out <- join_rows(df1, df2, multiple = NULL) Condition Warning: Specifying `multiple = NULL` was deprecated in dplyr 1.1.1. i Please use `multiple = "all"` instead. --- Code left_join(df1, df2, by = join_by(x), multiple = NULL) Condition Warning: Specifying `multiple = NULL` was deprecated in dplyr 1.1.1. i Please use `multiple = "all"` instead. Output # A tibble: 3 x 1 x 1 1 2 2 3 2 # `multiple = 'error'` is deprecated (#6731) Code join_rows(df1, df2, multiple = "error") Condition Warning: Specifying `multiple = "error"` was deprecated in dplyr 1.1.1. i Please use `relationship = "many-to-one"` instead. Error: ! Each row in `x` must match at most 1 row in `y`. i Row 2 of `x` matches multiple rows in `y`. --- Code left_join(df1, df2, by = join_by(x), multiple = "error") Condition Warning: Specifying `multiple = "error"` was deprecated in dplyr 1.1.1. i Please use `relationship = "many-to-one"` instead. Error in `left_join()`: ! Each row in `x` must match at most 1 row in `y`. i Row 2 of `x` matches multiple rows in `y`. # `multiple = 'warning'` is deprecated (#6731) Code out <- join_rows(df1, df2, multiple = "warning") Condition Warning: Specifying `multiple = "warning"` was deprecated in dplyr 1.1.1. i Please use `relationship = "many-to-one"` instead. Warning: Each row in `x` is expected to match at most 1 row in `y`. i Row 2 of `x` matches multiple rows. --- Code left_join(df1, df2, by = join_by(x), multiple = "warning") Condition Warning: Specifying `multiple = "warning"` was deprecated in dplyr 1.1.1. i Please use `relationship = "many-to-one"` instead. Warning in `left_join()`: Each row in `x` is expected to match at most 1 row in `y`. i Row 2 of `x` matches multiple rows. Output # A tibble: 3 x 1 x 1 1 2 2 3 2 dplyr/tests/testthat/_snaps/rows.md0000644000176200001440000002413714416000537017214 0ustar liggesusers# rows_insert() doesn't allow insertion of matched keys by default Code (expect_error(rows_insert(x, y, by = "a"))) Output Error in `rows_insert()`: ! `y` can't contain keys that already exist in `x`. i The following rows in `y` have keys that already exist in `x`: `c(1)`. i Use `conflict = "ignore"` if you want to ignore these `y` rows. --- Code (expect_error(rows_insert(x, y, by = "a"))) Output Error in `rows_insert()`: ! `y` can't contain keys that already exist in `x`. i The following rows in `y` have keys that already exist in `x`: `c(1, 2, 3)`. i Use `conflict = "ignore"` if you want to ignore these `y` rows. # rows_insert() casts keys to the type of `x` Code (expect_error(rows_insert(x, y, "key"))) Output Error in `rows_insert()`: ! Can't convert from `y$key` to `x$key` due to loss of precision. * Locations: 1 # rows_insert() casts values to the type of `x` Code (expect_error(rows_insert(x, y, "key"))) Output Error in `rows_insert()`: ! Can't convert from `y$value` to `x$value` due to loss of precision. * Locations: 1 # rows_insert() checks that `x` and `y` contain `by` (#6652) Code (expect_error(rows_insert(x, y, by = "c"))) Output Error in `rows_insert()`: ! All columns specified through `by` must exist in `x` and `y`. i The following columns are missing from `x`: `c`. --- Code (expect_error(rows_insert(x, y, by = c("a", "b")))) Output Error in `rows_insert()`: ! All columns specified through `by` must exist in `x` and `y`. i The following columns are missing from `y`: `b`. # `conflict` is validated Code (expect_error(rows_insert(x, y, by = "a", conflict = "foo"))) Output Error in `rows_insert()`: ! `conflict` must be one of "error" or "ignore", not "foo". Code (expect_error(rows_insert(x, y, by = "a", conflict = 1))) Output Error in `rows_insert()`: ! `conflict` must be a string or character vector. # rows_append() casts to the type of `x` Code (expect_error(rows_append(x, y))) Output Error in `rows_append()`: ! Can't convert from `y$key` to `x$key` due to loss of precision. * Locations: 1 # rows_append() requires that `y` columns be a subset of `x` Code (expect_error(rows_append(x, y))) Output Error in `rows_append()`: ! All columns in `y` must exist in `x`. i The following columns only exist in `y`: `c`. # rows_update() requires `y` keys to exist in `x` by default Code (expect_error(rows_update(x, y, "a"))) Output Error in `rows_update()`: ! `y` must contain keys that already exist in `x`. i The following rows in `y` have keys that don't exist in `x`: `c(1, 3)`. i Use `unmatched = "ignore"` if you want to ignore these `y` rows. # rows_update() doesn't allow `y` keys to be duplicated (#5553) Code (expect_error(rows_update(x, y, by = "a"))) Output Error in `rows_update()`: ! `y` key values must be unique. i The following rows contain duplicate key values: `c(1, 2)`. # rows_update() casts keys to their common type for matching but retains `x` type Code (expect_error(rows_update(x, y, "key"))) Output Error in `rows_update()`: ! Can't combine `x$key` and `y$key` . # rows_update() casts values to the type of `x` Code (expect_error(rows_update(x, y, "key"))) Output Error in `rows_update()`: ! Can't convert from `y$value` to `x$value` due to loss of precision. * Locations: 1 # `unmatched` is validated Code (expect_error(rows_update(x, y, by = "a", unmatched = "foo"))) Output Error in `rows_update()`: ! `unmatched` must be one of "error" or "ignore", not "foo". Code (expect_error(rows_update(x, y, by = "a", unmatched = 1))) Output Error in `rows_update()`: ! `unmatched` must be a string or character vector. # rows_patch() requires `y` keys to exist in `x` by default Code (expect_error(rows_patch(x, y, "a"))) Output Error in `rows_patch()`: ! `y` must contain keys that already exist in `x`. i The following rows in `y` have keys that don't exist in `x`: `c(1, 3)`. i Use `unmatched = "ignore"` if you want to ignore these `y` rows. # rows_patch() doesn't allow `y` keys to be duplicated (#5553) Code (expect_error(rows_patch(x, y, by = "a"))) Output Error in `rows_patch()`: ! `y` key values must be unique. i The following rows contain duplicate key values: `c(1, 2)`. # rows_patch() casts keys to their common type for matching but retains `x` type Code (expect_error(rows_patch(x, y, "key"))) Output Error in `rows_patch()`: ! Can't combine `x$key` and `y$key` . # rows_patch() casts values to the type of `x` Code (expect_error(rows_patch(x, y, "key"))) Output Error in `rows_patch()`: ! Can't convert from `y$value` to `x$value` due to loss of precision. * Locations: 1 # rows_upsert() doesn't allow `y` keys to be duplicated (#5553) Code (expect_error(rows_upsert(x, y, by = "a"))) Output Error in `rows_upsert()`: ! `y` key values must be unique. i The following rows contain duplicate key values: `c(1, 2)`. # rows_upsert() casts keys to their common type for matching but retains `x` type Code (expect_error(rows_upsert(x, y, "key"))) Output Error in `rows_upsert()`: ! Can't combine `x$key` and `y$key` . # rows_upsert() casts keys to the type of `x` Code (expect_error(rows_upsert(x, y, "key"))) Output Error in `rows_upsert()`: ! Can't convert from `y$key` to `x$key` due to loss of precision. * Locations: 1 # rows_upsert() casts values to the type of `x` Code (expect_error(rows_upsert(x, y, "key"))) Output Error in `rows_upsert()`: ! Can't convert from `y$value` to `x$value` due to loss of precision. * Locations: 1 # rows_delete() ignores extra `y` columns, with a message Code out <- rows_delete(x, y) Message Matching, by = "a" Ignoring extra `y` columns: b --- Code out <- rows_delete(x, y, by = "a") Message Ignoring extra `y` columns: b # rows_delete() requires `y` keys to exist in `x` by default Code (expect_error(rows_delete(x, y, "a"))) Output Error in `rows_delete()`: ! `y` must contain keys that already exist in `x`. i The following rows in `y` have keys that don't exist in `x`: `c(1, 3)`. i Use `unmatched = "ignore"` if you want to ignore these `y` rows. # rows_delete() casts keys to their common type for matching but retains `x` type Code (expect_error(rows_delete(x, y, "key"))) Output Error in `rows_delete()`: ! Can't combine `x$key` and `y$key` . # rows_check_x_contains_y() checks that `y` columns are in `x` Code (expect_error(rows_check_x_contains_y(x, y))) Output Error: ! All columns in `y` must exist in `x`. i The following columns only exist in `y`: `b`. # rows_check_by() checks that `y` has at least 1 column before using it (#6061) Code (expect_error(rows_check_by(by = NULL, y = y))) Output Error: ! `y` must have at least one column. # rows_check_by() uses the first column from `y` by default, with a message Code by <- rows_check_by(by = NULL, y = y) Message Matching, by = "a" # rows_check_by() validates `by` Code (expect_error(rows_check_by(by = 1, y = y))) Output Error: ! `by` must be a character vector. Code (expect_error(rows_check_by(by = character(), y = y))) Output Error: ! `by` must specify at least 1 column. Code (expect_error(rows_check_by(by = c(x = "y"), y = y))) Output Error: ! `by` must be unnamed. # rows_check_contains_by() checks that all `by` columns are in `x` Code (expect_error(rows_check_contains_by(x, "y", arg = "x"))) Output Error: ! All columns specified through `by` must exist in `x` and `y`. i The following columns are missing from `x`: `y`. Code (expect_error(rows_check_contains_by(x, c("y", "x", "z"), arg = "y"))) Output Error: ! All columns specified through `by` must exist in `x` and `y`. i The following columns are missing from `y`: `y` and `z`. # rows_check_unique() requires uniqueness Code (expect_error(rows_check_unique(x["x"], "x"))) Output Error: ! `x` key values must be unique. i The following rows contain duplicate key values: `c(1, 2, 3)`. Code (expect_error(rows_check_unique(x[c("x", "y")], "y"))) Output Error: ! `y` key values must be unique. i The following rows contain duplicate key values: `c(1, 3)`. dplyr/tests/testthat/_snaps/rename.md0000644000176200001440000000103514416000535017457 0ustar liggesusers# `.fn` result type is checked (#6561) Code rename_with(df, fn) Condition Error in `rename_with()`: ! `.fn` must return a character vector, not an integer. # `.fn` result size is checked (#6561) Code rename_with(df, fn) Condition Error in `rename_with()`: ! `.fn` must return a vector of length 2, not 3. # can't rename in `.cols` Code rename_with(df, toupper, .cols = c(y = x)) Condition Error in `rename_with()`: ! Can't rename variables in this context. dplyr/tests/testthat/_snaps/join-cross.md0000644000176200001440000000030214416000525020271 0ustar liggesusers# cross join checks for duplicate names Code cross_join(df1, df2) Condition Error in `cross_join()`: ! Input columns in `x` must be unique. x Problem with `a`. dplyr/tests/testthat/_snaps/colwise-filter.md0000644000176200001440000000073514416000510021137 0ustar liggesusers# colwise filter() give meaningful errors Code (expect_error(filter_if(mtcars, is_character, all_vars(. > 0)))) Output Error in `filter_if()`: ! `.predicate` must match at least one column. Code (expect_error(filter_all(mtcars, list(~ . > 0)))) Output Error in `filter_all()`: ! `.vars_predicate` must be a function or a call to `all_vars()` or `any_vars()`, not a list. dplyr/tests/testthat/_snaps/n-distinct.md0000644000176200001440000000073514416000532020267 0ustar liggesusers# n_distinct() generates useful errors Code n_distinct() Condition Error in `n_distinct()`: ! `...` is absent, but must be supplied. Code n_distinct(x = 1:4) Condition Error in `n_distinct()`: ! Arguments in `...` must be passed by position, not name. x Problematic argument: * x = 1:4 Code n_distinct(mean) Condition Error in `n_distinct()`: ! `..1` must be a vector, not a function. dplyr/tests/testthat/_snaps/deprec-src-local.md0000644000176200001440000000131114416000517021324 0ustar liggesusers# src_df() is deprecated / errors Code (expect_error(src_df("base", new.env()))) Output Error in `src_local()`: ! Exactly one of `pkg` and `env` must be non-NULL, not 2. Code (expect_error(src_df())) Output Error in `src_local()`: ! Exactly one of `pkg` and `env` must be non-NULL, not 0. Code env <- new.env(parent = emptyenv()) env$x <- 1 src_env <- src_df(env = env) (expect_error(copy_to(src_env, tibble(x = 1), name = "x"))) Output Error in `copy_to()`: ! Object with `name` = `x` must not already exist, unless `overwrite` = TRUE. dplyr/tests/testthat/_snaps/vec-case-match.md0000644000176200001440000001163314416000544020775 0ustar liggesusers# `haystacks` must be castable to `needles` Code vec_case_match(1L, haystacks = list(1.5), values = list(2)) Condition Error in `vec_case_match()`: ! Can't convert from `haystacks[[1]]` to due to loss of precision. * Locations: 1 # `ptype` overrides `values` common type Code vec_case_match(1:2, haystacks = list(1), values = list(1.5), ptype = integer()) Condition Error in `vec_case_match()`: ! Can't convert from `values[[1]]` to due to loss of precision. * Locations: 1 # `default` respects `ptype` Code vec_case_match(needles = 1, haystacks = list(1), values = list(2L), default = 1.5, ptype = integer()) Condition Error in `vec_case_match()`: ! Can't convert from `default` to due to loss of precision. * Locations: 1 # `NULL` values in `haystacks` and `values` are not dropped Code vec_case_match(1:2, list(1, NULL, 2), list("a", NULL, "b")) Condition Error in `vec_case_match()`: ! `haystacks[[2]]` must be a vector, not `NULL`. --- Code vec_case_match(1:2, list(1, NULL, 2), list("a", "a", "b")) Condition Error in `vec_case_match()`: ! `haystacks[[2]]` must be a vector, not `NULL`. --- Code vec_case_match(1:2, list(1, 1, 2), list("a", NULL, "b")) Condition Error in `vec_case_match()`: ! `values[[2]]` must be a vector, not `NULL`. # size of `needles` is maintained Code vec_case_match(1, haystacks = list(1), values = list(1:2)) Condition Error in `vec_case_match()`: ! `values[[1]]` must have size 1, not size 2. # requires at least one condition Code vec_case_match(1, haystacks = list(), values = list()) Condition Error in `vec_case_match()`: ! At least one condition must be supplied. # input must be a vector Code vec_case_match(environment(), haystacks = list(environment()), values = list(1)) Condition Error in `vec_case_match()`: ! `needles` must be a vector, not an environment. # `haystacks` must be a list Code vec_case_match(1, haystacks = 1, values = list(2)) Condition Error in `vec_case_match()`: ! `haystacks` must be a list, not the number 1. # `values` must be a list Code vec_case_match(1, haystacks = list(1), values = 2) Condition Error in `vec_case_match()`: ! `values` must be a list, not the number 2. # `needles_arg` is respected Code vec_case_match(needles = environment(), haystacks = list(environment()), values = list(1), needles_arg = "foo") Condition Error in `vec_case_match()`: ! `foo` must be a vector, not an environment. --- Code vec_case_match(needles = environment(), haystacks = list(environment()), values = list(1), needles_arg = "") Condition Error in `vec_case_match()`: ! Input must be a vector, not an environment. # `haystacks_arg` is respected Code vec_case_match(needles = 1, haystacks = 1, values = list(1), haystacks_arg = "foo") Condition Error in `vec_case_match()`: ! `foo` must be a list, not the number 1. --- Code vec_case_match(needles = 1, haystacks = 1, values = list(1), haystacks_arg = "") Condition Error in `vec_case_match()`: ! Input must be a list, not the number 1. --- Code vec_case_match(needles = 1, haystacks = list(a = "x"), values = list(1), haystacks_arg = "foo") Condition Error in `vec_case_match()`: ! Can't convert `foo$a` to . --- Code vec_case_match(needles = 1, haystacks = list("x"), values = list(1), haystacks_arg = "") Condition Error in `vec_case_match()`: ! Can't convert `..1` to . # `values_arg` is respected Code vec_case_match(needles = 1, haystacks = list(1, 2), values = list("x", b = 1), values_arg = "foo") Condition Error in `vec_case_match()`: ! Can't combine `foo[[1]]` and `foo$b` . --- Code vec_case_match(needles = 1, haystacks = list(1, 2), values = list("x", b = 1), values_arg = "") Condition Error in `vec_case_match()`: ! Can't combine `..1` and `b` . # `default_arg` is respected Code vec_case_match(needles = 1, haystacks = list(1), values = list(2L), default = 1.5, default_arg = "foo", ptype = integer()) Condition Error in `vec_case_match()`: ! Can't convert from `foo` to due to loss of precision. * Locations: 1 --- Code vec_case_match(needles = 1, haystacks = list(1), values = list(2L), default = 1.5, default_arg = "", ptype = integer()) Condition Error in `vec_case_match()`: ! Can't convert from to due to loss of precision. * Locations: 1 dplyr/tests/testthat/_snaps/lead-lag.md0000644000176200001440000000421414416000530017653 0ustar liggesusers# `lag()` gives informative error for objects Code lag(ts(1:10)) Condition Error in `lag()`: ! `x` must be a vector, not a , do you want `stats::lag()`? # `lead()` / `lag()` validate `n` Code lead(1:5, n = 1:2) Condition Error in `lead()`: ! `n` must be a whole number, not an integer vector. Code lead(1:5, -1) Condition Error in `lead()`: ! `n` must be positive. --- Code lag(1:5, n = 1:2) Condition Error in `lag()`: ! `n` must be a whole number, not an integer vector. Code lag(1:5, -1) Condition Error in `lag()`: ! `n` must be positive. # `lead()` / `lag()` check for empty dots Code lead(1:5, deault = 1) Condition Error in `lead()`: ! `...` must be empty. x Problematic argument: * deault = 1 --- Code lag(1:5, deault = 1) Condition Error in `lag()`: ! `...` must be empty. x Problematic argument: * deault = 1 # `lead()` / `lag()` require that `x` is a vector Code lead(environment()) Condition Error in `lead()`: ! `x` must be a vector, not an environment. --- Code lag(environment()) Condition Error in `lag()`: ! `x` must be a vector, not an environment. # `default` is cast to the type of `x` (#6330) Code shift(1L, default = 1.5) Condition Error: ! Can't convert from `default` to `x` due to loss of precision. * Locations: 1 # `default` must be size 1 (#5641) Code shift(1:5, default = 1:2) Condition Error: ! `default` must have size 1, not size 2. --- Code shift(1:5, default = integer()) Condition Error: ! `default` must have size 1, not size 0. # `n` is validated Code shift(1, n = 1:2) Condition Error in `shift()`: ! `n` must be a whole number, not an integer vector. # `order_by` must be the same size as `x` Code shift(1:5, order_by = 1:4) Condition Error in `with_order()`: ! `order_by` must have size 5, not size 4. dplyr/tests/testthat/_snaps/coalesce.md0000644000176200001440000000253114416000510017761 0ustar liggesusers# coalesce() gives meaningful error messages Code (expect_error(coalesce(1:2, 1:3))) Output Error in `coalesce()`: ! Can't recycle `..1` (size 2) to match `..2` (size 3). Code (expect_error(coalesce(1:2, letters[1:2]))) Output Error in `coalesce()`: ! Can't combine `..1` and `..2` . # `.size` overrides the common size Code coalesce(x, 1:2, .size = vec_size(x)) Condition Error in `coalesce()`: ! Can't recycle `..2` (size 2) to size 1. # must have at least one non-`NULL` vector Code coalesce() Condition Error in `coalesce()`: ! `...` can't be empty. --- Code coalesce(NULL, NULL) Condition Error in `coalesce()`: ! `...` can't be empty. # inputs must be vectors Code coalesce(1, environment()) Condition Error in `coalesce()`: ! `..2` must be a vector, not an environment. # names in error messages are indexed correctly Code coalesce(1, "x") Condition Error in `coalesce()`: ! Can't combine `..1` and `..2` . --- Code coalesce(1, y = "x") Condition Error in `coalesce()`: ! Can't combine `..1` and `y` . dplyr/tests/testthat/_snaps/slice.md0000644000176200001440000002627314525503021017321 0ustar liggesusers# slice errors if positive and negative indices mixed Code slice(tibble(), 1, -1) Condition Error in `slice()`: ! Can't compute indices. Caused by error: ! Can't subset elements with `1`. x Negative and positive locations can't be mixed. i Subscript `1` has a positive value at location 1. # slicing with one-column matrix is deprecated Code out <- slice(df, matrix(c(1, 3))) Condition Warning: Slicing with a 1-column matrix was deprecated in dplyr 1.1.0. # slice errors if index is not numeric Code slice(tibble(), "a") Condition Error in `slice()`: i In argument: `"a"`. Caused by error: ! Can't subset elements with `"a"`. x `"a"` must be numeric, not the string "a". # user errors are correctly labelled Code slice(df, 1 + "") Condition Error in `slice()`: i In argument: `1 + ""`. Caused by error in `1 + ""`: ! non-numeric argument to binary operator Code slice(group_by(df, x), 1 + "") Condition Error in `slice()`: i In argument: `1 + ""`. i In group 1: `x = 1`. Caused by error in `1 + ""`: ! non-numeric argument to binary operator # `...` can't be named (#6554) Code slice(df, 1, foo = g) Condition Error in `slice()`: ! Arguments in `...` must be passed by position, not name. x Problematic argument: * foo = g # can't use `.by` with `.preserve` Code slice(df, .by = x, .preserve = TRUE) Condition Error in `slice()`: ! Can't supply both `.by` and `.preserve`. # catches `.by` with grouped-df Code slice(gdf, .by = x) Condition Error in `slice()`: ! Can't supply `.by` when `.data` is a grouped data frame. # catches `.by` with rowwise-df Code slice(rdf, .by = x) Condition Error in `slice()`: ! Can't supply `.by` when `.data` is a rowwise data frame. # catches `by` typo (#6647) Code slice(df, by = x) Condition Error in `slice()`: ! Can't specify an argument named `by` in this verb. i Did you mean to use `.by` instead? # slice_helpers() call get_slice_size() Code slice_head(df, n = "a") Condition Error in `slice_head()`: ! `n` must be a round number, not the string "a". Code slice_tail(df, n = "a") Condition Error in `slice_tail()`: ! `n` must be a round number, not the string "a". Code slice_min(df, x, n = "a") Condition Error in `slice_min()`: ! `n` must be a round number, not the string "a". Code slice_max(df, x, n = "a") Condition Error in `slice_max()`: ! `n` must be a round number, not the string "a". Code slice_sample(df, n = "a") Condition Error in `slice_sample()`: ! `n` must be a round number, not the string "a". # get_slice_size() validates its inputs Code get_slice_size(n = 1, prop = 1) Condition Error: ! Must supply `n` or `prop`, but not both. Code get_slice_size(n = "a") Condition Error: ! `n` must be a round number, not the string "a". Code get_slice_size(prop = "a") Condition Error: ! `prop` must be a number, not the string "a". # get_slice_size() snapshots Code body(get_slice_size(prop = 0)) Output clamp(0, floor(0 * n), n) Code body(get_slice_size(prop = 0.4)) Output clamp(0, floor(0.4 * n), n) Code body(get_slice_size(prop = 2)) Output clamp(0, floor(2 * n), n) Code body(get_slice_size(prop = 2, allow_outsize = TRUE)) Output floor(2 * n) Code body(get_slice_size(prop = -0.4)) Output clamp(0, ceiling(n + -0.4 * n), n) Code body(get_slice_size(prop = -2)) Output clamp(0, ceiling(n + -2 * n), n) Code body(get_slice_size(n = 0)) Output clamp(0, 0, n) Code body(get_slice_size(n = 4)) Output clamp(0, 4, n) Code body(get_slice_size(n = 20)) Output clamp(0, 20, n) Code body(get_slice_size(n = 20, allow_outsize = TRUE)) Output [1] 20 Code body(get_slice_size(n = -4)) Output clamp(0, ceiling(n + -4), n) Code body(get_slice_size(n = -20)) Output clamp(0, ceiling(n + -20), n) # n must be an integer Code slice_head(df, n = 1.1) Condition Error in `slice_head()`: ! `n` must be a round number, not the number 1.1. # slice_*() checks that `n=` is explicitly named and ... is empty Code slice_head(df, 5) Condition Error in `slice_head()`: ! `n` must be explicitly named. i Did you mean `slice_head(n = 5)`? Code slice_tail(df, 5) Condition Error in `slice_tail()`: ! `n` must be explicitly named. i Did you mean `slice_tail(n = 5)`? Code slice_min(df, x, 5) Condition Error in `slice_min()`: ! `n` must be explicitly named. i Did you mean `slice_min(n = 5)`? Code slice_max(df, x, 5) Condition Error in `slice_max()`: ! `n` must be explicitly named. i Did you mean `slice_max(n = 5)`? Code slice_sample(df, 5) Condition Error in `slice_sample()`: ! `n` must be explicitly named. i Did you mean `slice_sample(n = 5)`? --- Code dplyr::slice_head(df, 5) Condition Error in `dplyr::slice_head()`: ! `n` must be explicitly named. i Did you mean `dplyr::slice_head(n = 5)`? Code dplyr::slice_tail(df, 5) Condition Error in `dplyr::slice_tail()`: ! `n` must be explicitly named. i Did you mean `dplyr::slice_tail(n = 5)`? Code dplyr::slice_min(df, x, 5) Condition Error in `dplyr::slice_min()`: ! `n` must be explicitly named. i Did you mean `dplyr::slice_min(n = 5)`? Code dplyr::slice_max(df, x, 5) Condition Error in `dplyr::slice_max()`: ! `n` must be explicitly named. i Did you mean `dplyr::slice_max(n = 5)`? Code dplyr::slice_sample(df, 5) Condition Error in `dplyr::slice_sample()`: ! `n` must be explicitly named. i Did you mean `dplyr::slice_sample(n = 5)`? --- Code slice_head(df, 5, 2) Condition Error in `slice_head()`: ! `...` must be empty. x Problematic arguments: * ..1 = 5 * ..2 = 2 i Did you forget to name an argument? Code slice_tail(df, 5, 2) Condition Error in `slice_tail()`: ! `...` must be empty. x Problematic arguments: * ..1 = 5 * ..2 = 2 i Did you forget to name an argument? Code slice_min(df, x, 5, 2) Condition Error in `slice_min()`: ! `...` must be empty. x Problematic arguments: * ..1 = 5 * ..2 = 2 i Did you forget to name an argument? Code slice_max(df, x, 5, 2) Condition Error in `slice_max()`: ! `...` must be empty. x Problematic arguments: * ..1 = 5 * ..2 = 2 i Did you forget to name an argument? Code slice_sample(df, 5, 2) Condition Error in `slice_sample()`: ! `...` must be empty. x Problematic arguments: * ..1 = 5 * ..2 = 2 i Did you forget to name an argument? # slice_helper `by` errors use correct error context and correct `by_arg` Code slice_head(gdf, n = 1, by = x) Condition Error in `slice_head()`: ! Can't supply `by` when `.data` is a grouped data frame. Code slice_tail(gdf, n = 1, by = x) Condition Error in `slice_tail()`: ! Can't supply `by` when `.data` is a grouped data frame. Code slice_min(gdf, order_by = x, by = x) Condition Error in `slice_min()`: ! Can't supply `by` when `.data` is a grouped data frame. Code slice_max(gdf, order_by = x, by = x) Condition Error in `slice_max()`: ! Can't supply `by` when `.data` is a grouped data frame. Code slice_sample(gdf, n = 1, by = x) Condition Error in `slice_sample()`: ! Can't supply `by` when `.data` is a grouped data frame. # slice_helper catches `.by` typo (#6647) Code slice_head(df, n = 1, .by = x) Condition Error in `slice_head()`: ! Can't specify an argument named `.by` in this verb. i Did you mean to use `by` instead? Code slice_tail(df, n = 1, .by = x) Condition Error in `slice_tail()`: ! Can't specify an argument named `.by` in this verb. i Did you mean to use `by` instead? Code slice_min(df, order_by = x, .by = x) Condition Error in `slice_min()`: ! Can't specify an argument named `.by` in this verb. i Did you mean to use `by` instead? Code slice_max(df, order_by = x, .by = x) Condition Error in `slice_max()`: ! Can't specify an argument named `.by` in this verb. i Did you mean to use `by` instead? Code slice_sample(df, n = 1, .by = x) Condition Error in `slice_sample()`: ! Can't specify an argument named `.by` in this verb. i Did you mean to use `by` instead? # slice_min/max() check size of `order_by=` (#5922) Code slice_min(data.frame(x = 1:10), 1:6) Condition Error in `slice_min()`: ! Can't compute indices. Caused by error: ! `order_by` must have size 10, not size 6. Code slice_max(data.frame(x = 1:10), 1:6) Condition Error in `slice_max()`: ! Can't compute indices. Caused by error: ! `order_by` must have size 10, not size 6. # slice_min/max() validate simple arguments Code slice_min(data.frame(x = 1:10)) Condition Error in `slice_min()`: ! `order_by` is absent but must be supplied. Code slice_max(data.frame(x = 1:10)) Condition Error in `slice_max()`: ! `order_by` is absent but must be supplied. Code slice_min(data.frame(x = 1:10), x, with_ties = 1) Condition Error in `slice_min()`: ! `with_ties` must be `TRUE` or `FALSE`, not the number 1. Code slice_max(data.frame(x = 1:10), x, with_ties = 1) Condition Error in `slice_max()`: ! `with_ties` must be `TRUE` or `FALSE`, not the number 1. Code slice_min(data.frame(x = 1:10), x, na_rm = 1) Condition Error in `slice_min()`: ! `na_rm` must be `TRUE` or `FALSE`, not the number 1. Code slice_max(data.frame(x = 1:10), x, na_rm = 1) Condition Error in `slice_max()`: ! `na_rm` must be `TRUE` or `FALSE`, not the number 1. # slice_sample() checks size of `weight_by=` (#5922) Code slice_sample(df, n = 2, weight_by = 1:6) Condition Error in `slice_sample()`: ! Can't compute indices. Caused by error: ! `weight_by` must have size 10, not size 6. # `slice_sample()` validates `replace` Code slice_sample(df, replace = 1) Condition Error in `slice_sample()`: ! `replace` must be `TRUE` or `FALSE`, not the number 1. Code slice_sample(df, replace = NA) Condition Error in `slice_sample()`: ! `replace` must be `TRUE` or `FALSE`, not `NA`. dplyr/tests/testthat/_snaps/relocate.md0000644000176200001440000000030014416000535020000 0ustar liggesusers# can only supply one of .before and .after Code relocate(df, .before = 1, .after = 1) Condition Error in `relocate()`: ! Can't supply both `.before` and `.after`. dplyr/tests/testthat/_snaps/bind-cols.md0000644000176200001440000000151614416000507020065 0ustar liggesusers# bind_cols() repairs names Code bound <- bind_cols(df, df) Message New names: * `a` -> `a...1` * `b` -> `b...2` * `a` -> `a...3` * `b` -> `b...4` # bind_cols() handles unnamed list with name repair (#3402) Code df <- bind_cols(list(1, 2)) Message New names: * `` -> `...1` * `` -> `...2` # bind_cols() gives informative errors Code # # incompatible size (expect_error(bind_cols(a = 1:2, mtcars))) Output Error in `bind_cols()`: ! Can't recycle `a` (size 2) to match `..2` (size 32). Code (expect_error(bind_cols(mtcars, a = 1:3))) Output Error in `bind_cols()`: ! Can't recycle `..1` (size 32) to match `a` (size 3). dplyr/tests/testthat/_snaps/deprec-lazyeval.md0000644000176200001440000000236214472225345021315 0ustar liggesusers# mutate_each() and mutate_each_() are deprecated (#6869) Code mutate_each(df, list(~ .x + 1L)) Condition Warning: `mutate_each()` was deprecated in dplyr 0.7.0. i Please use `across()` instead. Output # A tibble: 2 x 2 x y 1 2 4 2 3 5 --- Code mutate_each_(df, list(~ .x + 1L), c("x", "y")) Condition Warning: `mutate_each_()` was deprecated in dplyr 0.7.0. i Please use `across()` instead. Output # A tibble: 2 x 2 x y 1 2 4 2 3 5 # summarise_each() and summarise_each_() are deprecated (#6869) Code summarise_each(df, list(mean)) Condition Warning: `summarise_each()` was deprecated in dplyr 0.7.0. i Please use `across()` instead. Output # A tibble: 1 x 2 x y 1 1.5 3.5 --- Code summarise_each_(df, list(mean), c("x", "y")) Condition Warning: `summarise_each_()` was deprecated in dplyr 0.7.0. i Please use `across()` instead. Output # A tibble: 1 x 2 x y 1 1.5 3.5 dplyr/tests/testthat/_snaps/desc.md0000644000176200001440000000022114416000517017122 0ustar liggesusers# errors cleanly on non-vectors Code desc(mean) Condition Error in `desc()`: ! `x` must be a vector, not a function. dplyr/tests/testthat/_snaps/order-by.md0000644000176200001440000000153114416000533017732 0ustar liggesusers# order_by() gives useful error messages Code (expect_error(order_by(mtcars, 10))) Output Error in `order_by()`: ! `call` must be a function call, not the number 10. Code (expect_error(order_by(mtcars, cyl))) Output Error in `order_by()`: ! `call` must be a function call, not a symbol. i Did you mean `arrange(mtcars, cyl)`? # `with_order()` requires `order_by` and `x` to be the same size Code with_order(1:2, identity, 1:3) Condition Error in `with_order()`: ! `order_by` must have size 3, not size 2. # order_by() give meaningful errors Code (expect_error(order_by(NULL, 1L))) Output Error in `order_by()`: ! `call` must be a function call, not the number 1. dplyr/tests/testthat/_snaps/bind-rows.md0000644000176200001440000000263014416000507020115 0ustar liggesusers# bind_rows() only flattens S3 lists that inherit from list (#3924) Code bind_rows(lst1) Condition Error in `bind_rows()`: ! Argument 1 must be a data frame or a named atomic vector. # bind_rows() validates lists (#5417) Code bind_rows(list(x = 1), list(x = 1:3, y = 1:2)) Condition Error in `vctrs::data_frame()`: ! Can't recycle `x` (size 3) to match `y` (size 2). # bind_rows() give informative errors Code # invalid .id df1 <- tibble(x = 1:3) df2 <- tibble(x = 4:6) (expect_error(bind_rows(df1, df2, .id = 5))) Output Error in `bind_rows()`: ! `.id` must be a single string, not the number 5. Code # invalid type ll <- list(tibble(a = 1:5), env(a = 1)) (expect_error(bind_rows(ll))) Output Error in `bind_rows()`: ! Argument 2 must be a data frame or a named atomic vector. Code df1 <- tibble(a = factor("a")) df2 <- tibble(a = 1L) (expect_error(bind_rows(df1, df2))) Output Error in `bind_rows()`: ! Can't combine `..1$a` > and `..2$a` . Code # unnamed vectors (expect_error(bind_rows(1:2))) Output Error in `bind_rows()`: ! Argument 1 must be a data frame or a named atomic vector. dplyr/tests/testthat/_snaps/group-map.md0000644000176200001440000000175114416000522020120 0ustar liggesusers# group_map() give meaningful errors Code (expect_error(mtcars %>% group_by(cyl) %>% group_modify(~ data.frame(cyl = 19))) ) Output Error in `group_modify()`: ! The returned data frame cannot contain the original grouping variables: cyl. Code (expect_error(mtcars %>% group_by(cyl) %>% group_modify(~10))) Output Error in `group_modify()`: ! The result of `.f` must be a data frame. Code (expect_error(iris %>% group_by(Species) %>% group_modify(head1))) Output Error in `group_modify()`: ! `.f` must accept at least two arguments. i You can use `...` to absorb unused components. Code (expect_error(iris %>% group_by(Species) %>% group_map(head1))) Output Error in `group_map()`: ! `.f` must accept at least two arguments. i You can use `...` to absorb unused components. dplyr/tests/testthat/_snaps/funs.md0000644000176200001440000000142114416000521017155 0ustar liggesusers# takes the common type between all inputs (#6478) Code between("1", 2, 3) Condition Error in `between()`: ! Can't combine `x` and `left` . --- Code between(1, "2", 3) Condition Error in `between()`: ! Can't combine `x` and `left` . --- Code between(1, 2, "3") Condition Error in `between()`: ! Can't combine `x` and `right` . # recycles `left` and `right` to the size of `x` Code between(1:3, 1:2, 1L) Condition Error in `between()`: ! Can't recycle `left` (size 2) to size 3. --- Code between(1:3, 1L, 1:2) Condition Error in `between()`: ! Can't recycle `right` (size 2) to size 3. dplyr/tests/testthat/_snaps/top-n.md0000644000176200001440000000062514416000544017251 0ustar liggesusers# top_n() quotes n Code res1 <- top_n(mtcars, n() * 0.5) Message Selecting by carb --- Code res2 <- top_n(mtcars, 16) Message Selecting by carb # top_frac() is a shorthand for top_n(n()*) Code res1 <- top_n(mtcars, n() * 0.5) Message Selecting by carb --- Code res2 <- top_frac(mtcars, 0.5) Message Selecting by carb dplyr/tests/testthat/_snaps/recode.md0000644000176200001440000000257714416000535017465 0ustar liggesusers# `recode()` signals that it is superseded Code catch_cnd(recode("a", a = "A")) Output # `recode_factor()` signals that it is superseded Code catch_cnd(recode_factor("a", a = "A")) Output # recode() gives meaningful error messages Code (expect_error(recode(factor("a"), a = 5, .missing = 10))) Output Error in `recode()`: ! `.missing` is not supported for factors. Code (expect_error(recode("a", b = 5, "c"))) Output Error in `recode()`: ! Argument 3 must be named. Code (expect_error(recode(factor("a"), b = 5, "c"))) Output Error in `recode()`: ! Argument 3 must be named. Code (expect_error(recode(1:5))) Output Error in `recode()`: ! No replacements provided. Code (expect_error(recode("a"))) Output Error in `recode()`: ! No replacements provided. Code (expect_error(recode(factor("a")))) Output Error in `recode()`: ! No replacements provided. dplyr/tests/testthat/_snaps/deprec-do.md0000644000176200001440000000155114416000516020054 0ustar liggesusers# do() gives meaningful error messages Code (expect_error(df %>% do(head, tail))) Output Error in `do()`: ! Can only supply one unnamed argument, not 2. Code (expect_error(df %>% ungroup() %>% do(1))) Output Error in `do()`: ! Result must be a data frame, not numeric. Code (expect_error(df %>% do(1))) Output Error in `do()`: ! Results 1, 2, 3 must be data frames, not numeric. Code (expect_error(df %>% do("a"))) Output Error in `do()`: ! Results 1, 2, 3 must be data frames, not character. Code (expect_error(df %>% do(x = 1, 2))) Output Error in `do()`: ! Arguments must either be all named or all unnamed. dplyr/tests/testthat/_snaps/nth-value.md0000644000176200001440000000306514416000533020116 0ustar liggesusers# `na_rm` is validated Code nth(1, 1, na_rm = 1) Condition Error in `nth()`: ! `na_rm` must be `TRUE` or `FALSE`, not the number 1. --- Code nth(1, 1, na_rm = c(TRUE, FALSE)) Condition Error in `nth()`: ! `na_rm` must be `TRUE` or `FALSE`, not a logical vector. # `default` must be size 1 (when not used with lists) Code nth(1L, n = 2L, default = 1:2) Condition Error in `nth()`: ! `default` must have size 1, not size 2. # `default` is cast to the type of `x` (when not used with lists) Code nth("x", 2, default = 2) Condition Error in `nth()`: ! Can't convert `default` to match type of `x` . # `n` is validated (#5466) Code nth(1:10, n = "x") Condition Error in `nth()`: ! Can't convert `n` to . --- Code nth(1:10, n = 1:2) Condition Error in `nth()`: ! `n` must have size 1, not size 2. --- Code nth(1:10, n = NA_integer_) Condition Error in `nth()`: ! `n` can't be `NA`. # `x` must be a vector Code nth(environment(), 1L) Condition Error in `vec_size()`: ! `x` must be a vector, not an environment. # `order_by` must be the same size as `x` Code nth(1:5, n = 1L, order_by = 1:2) Condition Error in `nth()`: ! `order_by` must have size 5, not size 2. --- Code nth(1:5, n = 6L, order_by = 1:2) Condition Error in `nth()`: ! `order_by` must have size 5, not size 2. dplyr/tests/testthat/_snaps/select.md0000644000176200001440000000452614416000540017473 0ustar liggesusers# grouping variables preserved with a message, unless already selected (#1511, #5841) Code res <- select(df, x) Message Adding missing grouping variables: `g` --- Code expect_equal(df %>% select(a = c), tibble(b = 2, a = 3) %>% group_by(b)) Message Adding missing grouping variables: `b` Code expect_equal(df %>% select(b = c), tibble(a = 1, b = 3) %>% group_by(a)) Message Adding missing grouping variables: `a` # non-syntactic grouping variable is preserved (#1138) Code df <- tibble(`a b` = 1L) %>% group_by(`a b`) %>% select() Message Adding missing grouping variables: `a b` # select() provides informative errors Code (expect_error(select(mtcars, 1 + ""))) Output Error in `select()`: ! Problem while evaluating `1 + ""`. Caused by error in `1 + ""`: ! non-numeric argument to binary operator # dplyr_col_select() aborts when `[` implementation is broken Code (expect_error(select(df1, 1:2))) Output Error in `select()`: ! Can't subset columns past the end. i Location 2 doesn't exist. i There is only 1 column. Code (expect_error(select(df1, 0))) Output Error in `select()`: ! Can't reconstruct data frame. x The `[` method for class must return a data frame. i It returned a . --- Code (expect_error(select(df1, 2))) Output Error in `select()`: ! Can't subset columns past the end. i Location 2 doesn't exist. i There is only 1 column. Code (expect_error(select(df1, 1))) Output Error in `select()`: ! Can't reconstruct data frame. x The `[` method for class must return a data frame. i It returned a . Code (expect_error(select(df2, 1))) Output Error in `select()`: ! Can't reconstruct data frame. x The `[` method for class must return a data frame with 1 column. i It returned a of 0 columns. dplyr/tests/testthat/_snaps/deprec-context.md0000644000176200001440000000237514416000516021143 0ustar liggesusers# cur_data() is soft deprecated Code mutate(df, y = cur_data()) Condition Warning: There was 1 warning in `mutate()`. i In argument: `y = cur_data()`. Caused by warning: ! `cur_data()` was deprecated in dplyr 1.1.0. i Please use `pick()` instead. Output # A tibble: 1 x 2 x y$x 1 1 1 # cur_data_all() is soft deprecated Code mutate(df, y = cur_data_all()) Condition Warning: There was 1 warning in `mutate()`. i In argument: `y = cur_data_all()`. Caused by warning: ! `cur_data_all()` was deprecated in dplyr 1.1.0. i Please use `pick()` instead. Output # A tibble: 1 x 2 x y$x 1 1 1 # give useful error messages when not applicable Code (expect_error(cur_data())) Output Error in `cur_data()`: ! Must only be used inside data-masking verbs like `mutate()`, `filter()`, and `group_by()`. Code (expect_error(cur_data_all())) Output Error in `cur_data_all()`: ! Must only be used inside data-masking verbs like `mutate()`, `filter()`, and `group_by()`. dplyr/tests/testthat/_snaps/reframe.md0000644000176200001440000000304614416000535017635 0ustar liggesusers# `reframe()` throws intelligent recycling errors Code reframe(df, x = 1:2, y = 3:5) Condition Error in `reframe()`: ! Can't recycle `y = 3:5`. Caused by error: ! `y` must be size 2 or 1, not 3. i An earlier column had size 2. --- Code reframe(df, x = 1:2, y = 3:5, .by = g) Condition Error in `reframe()`: ! Can't recycle `y = 3:5`. i In group 1: `g = 1`. Caused by error: ! `y` must be size 2 or 1, not 3. i An earlier column had size 2. --- Code reframe(gdf, x = 1:2, y = 3:5) Condition Error in `reframe()`: ! Can't recycle `y = 3:5`. i In group 1: `g = 1`. Caused by error: ! `y` must be size 2 or 1, not 3. i An earlier column had size 2. # `reframe()` doesn't message about regrouping when multiple group columns are supplied Code out <- reframe(df, x = mean(x), .by = c(a, b)) --- Code out <- reframe(gdf, x = mean(x)) # `reframe()` doesn't message about regrouping when >1 rows are returned per group Code out <- reframe(df, x = vec_rep_each(x, x), .by = g) --- Code out <- reframe(gdf, x = vec_rep_each(x, x)) # catches `.by` with grouped-df Code reframe(gdf, .by = x) Condition Error in `reframe()`: ! Can't supply `.by` when `.data` is a grouped data frame. # catches `.by` with rowwise-df Code reframe(rdf, .by = x) Condition Error in `reframe()`: ! Can't supply `.by` when `.data` is a rowwise data frame. dplyr/tests/testthat/_snaps/grouped-df.md0000644000176200001440000000631214416000523020244 0ustar liggesusers# validate_grouped_df() gives useful errors Code (expect_error(validate_grouped_df(df1))) Output Error in `validate_grouped_df()`: ! The `.rows` column must be list of one-based integer vectors. Code (expect_error(group_data(df1))) Output Error in `group_data()`: ! `.data` must be a valid object. Caused by error in `validate_grouped_df()`: ! The `.rows` column must be list of one-based integer vectors. Code (expect_error(validate_grouped_df(df2))) Output Error in `validate_grouped_df()`: ! The last column of the `groups` attribute must be called `.rows`. Code (expect_error(validate_grouped_df(df2))) Output Error in `validate_grouped_df()`: ! The last column of the `groups` attribute must be called `.rows`. Code (expect_error(validate_grouped_df(df3))) Output Error in `validate_grouped_df()`: ! The `groups` attribute must be a data frame. Code (expect_error(validate_grouped_df(df4))) Output Error in `validate_grouped_df()`: ! The `groups` attribute must be a data frame. Code (expect_error(validate_grouped_df(df5))) Output Error in `validate_grouped_df()`: ! Corrupt `grouped_df` using old (< 0.8.0) format. i Strip off old grouping with `ungroup()`. Code (expect_error(validate_grouped_df(df6, check_bounds = TRUE))) Output Error in `validate_grouped_df()`: ! out of bounds indices. Code (expect_error(validate_grouped_df(df7, check_bounds = TRUE))) Output Error in `validate_grouped_df()`: ! out of bounds indices. Code (expect_error(validate_grouped_df(df8, check_bounds = TRUE))) Output Error in `validate_grouped_df()`: ! out of bounds indices. Code (expect_error(validate_grouped_df(df10))) Output Error in `validate_grouped_df()`: ! The `groups` attribute must be a data frame. Code (expect_error(validate_grouped_df(df11))) Output Error in `validate_grouped_df()`: ! The `groups` attribute must be a data frame. Code (expect_error(new_grouped_df(tibble(x = 1:10), tibble(other = list(1:2))))) Output Error in `new_grouped_df()`: ! The last column of `groups` must be called ".rows". Code (expect_error(new_grouped_df(10))) Output Error in `new_grouped_df()`: ! `x` must be a data frame. # helper gives meaningful error messages Code (expect_error(grouped_df(data.frame(x = 1), "y", FALSE))) Output Error in `compute_groups()`: ! `vars` missing from `data`: `y`. Code (expect_error(grouped_df(data.frame(x = 1), 1))) Output Error in `grouped_df()`: ! `vars` must be a character vector. dplyr/tests/testthat/_snaps/colwise-select.md0000644000176200001440000000333514416000511021131 0ustar liggesusers# colwise select() / rename() give meaningful errors Code df <- tibble(x = 0L, y = 0.5, z = 1) (expect_error(df %>% rename_all())) Output Error in `rename_all()`: ! `.funs` must specify a renaming function. Code (expect_error(df %>% rename_if(is_integerish))) Output Error in `rename_if()`: ! `.funs` must specify a renaming function. Code (expect_error(df %>% rename_at(vars(x:y)))) Output Error in `rename_at()`: ! `.funs` must specify a renaming function. Code (expect_error(df %>% rename_all(list(tolower, toupper)))) Output Error in `rename_all()`: ! `.funs` must contain one renaming function, not 2. Code (expect_error(df %>% select_all(list(tolower, toupper)))) Output Error in `select_all()`: ! `.funs` must contain one renaming function, not 2. Code (expect_error(df %>% select_if(function(.x) 1))) Output Error in `select_if()`: ! `.p` is invalid. x `.p` should return a single logical. i `.p` returns a for column `x`. Code (expect_error(df %>% select_if(function(.x) c(TRUE, TRUE)))) Output Error in `select_if()`: ! `.p` is invalid. x `.p` should return a single logical. i `.p` returns a size 2 for column `x`. Code (expect_error(data.frame() %>% select_all(.funs = 42))) Output Error in `select_all()`: ! `.funs` must be a one sided formula, a function, or a function name. dplyr/tests/testthat/_snaps/context.md0000644000176200001440000000234714416000514017700 0ustar liggesusers# give useful error messages when not applicable Code (expect_error(n())) Output Error in `n()`: ! Must only be used inside data-masking verbs like `mutate()`, `filter()`, and `group_by()`. Code (expect_error(cur_column())) Output Error in `cur_column()`: ! Must only be used inside `across()`. Code (expect_error(cur_group())) Output Error in `cur_group()`: ! Must only be used inside data-masking verbs like `mutate()`, `filter()`, and `group_by()`. Code (expect_error(cur_group_id())) Output Error in `cur_group_id()`: ! Must only be used inside data-masking verbs like `mutate()`, `filter()`, and `group_by()`. Code (expect_error(cur_group_rows())) Output Error in `cur_group_rows()`: ! Must only be used inside data-masking verbs like `mutate()`, `filter()`, and `group_by()`. # group labels are correctly formatted Code group_labels_details(c(a = 1)) Output [1] "`a = 1`" Code group_labels_details(c(a = 1, b = 2)) Output [1] "`a = 1`, `b = 2`" dplyr/tests/testthat/_snaps/deprec-combine.md0000644000176200001440000000115214416000727021067 0ustar liggesusers# combine() is deprecated Code combine() Condition Warning: `combine()` was deprecated in dplyr 1.0.0. i Please use `vctrs::vec_c()` instead. Output logical(0) # combine() gives meaningful error messages Code (expect_error(combine("a", 1))) Output Error in `vec_c()`: ! Can't combine `..1` and `..2` . Code (expect_error(combine(factor("a"), 1L))) Output Error in `vec_c()`: ! Can't combine `..1` > and `..2` . dplyr/tests/testthat/_snaps/case-when.md0000644000176200001440000000624714416000510020065 0ustar liggesusers# `.default` isn't part of recycling Code case_when(FALSE ~ 1L, .default = 2:5) Condition Error in `case_when()`: ! `.default` must have size 1, not size 4. # `.default` is part of common type computation Code case_when(TRUE ~ 1L, .default = "x") Condition Error in `case_when()`: ! Can't combine `..1 (right)` and `.default` . # passes through `.size` correctly Code case_when(TRUE ~ 1:2, .size = 3) Condition Error in `case_when()`: ! Can't recycle `..1 (right)` (size 2) to size 3. # invalid type errors are correct (#6261) (#6206) Code case_when(TRUE ~ 1, TRUE ~ "x") Condition Error in `case_when()`: ! Can't combine `..1 (right)` and `..2 (right)` . # `NULL` formula element throws meaningful error Code case_when(1 ~ NULL) Condition Error in `case_when()`: ! `..1 (right)` must be a vector, not `NULL`. --- Code case_when(NULL ~ 1) Condition Error in `case_when()`: ! `..1 (left)` must be a logical vector, not `NULL`. # throws chained errors when formula evaluation fails Code case_when(1 ~ 2, 3 ~ stop("oh no!")) Condition Error in `case_when()`: ! Failed to evaluate the right-hand side of formula 2. Caused by error: ! oh no! --- Code case_when(1 ~ 2, stop("oh no!") ~ 4) Condition Error in `case_when()`: ! Failed to evaluate the left-hand side of formula 2. Caused by error: ! oh no! # case_when() give meaningful errors Code (expect_error(case_when(c(TRUE, FALSE) ~ 1:3, c(FALSE, TRUE) ~ 1:2))) Output Error in `case_when()`: ! Can't recycle `..1 (left)` (size 2) to match `..1 (right)` (size 3). Code (expect_error(case_when(c(TRUE, FALSE) ~ 1, c(FALSE, TRUE, FALSE) ~ 2, c(FALSE, TRUE, FALSE, NA) ~ 3))) Output Error in `case_when()`: ! Can't recycle `..1 (left)` (size 2) to match `..2 (left)` (size 3). Code (expect_error(case_when(50 ~ 1:3))) Output Error in `case_when()`: ! `..1 (left)` must be a logical vector, not a double vector. Code (expect_error(case_when(paste(50)))) Output Error in `case_when()`: ! Case 1 (`paste(50)`) must be a two-sided formula, not the string "50". Code (expect_error(case_when(y ~ x, paste(50)))) Output Error in `case_when()`: ! Case 2 (`paste(50)`) must be a two-sided formula, not the string "50". Code (expect_error(case_when())) Output Error in `case_when()`: ! At least one condition must be supplied. Code (expect_error(case_when(NULL))) Output Error in `case_when()`: ! At least one condition must be supplied. Code (expect_error(case_when(~ 1:2))) Output Error in `case_when()`: ! Case 1 (`~1:2`) must be a two-sided formula. dplyr/tests/testthat/_snaps/join.md0000644000176200001440000001623214416000530017147 0ustar liggesusers# can't use `keep = FALSE` with non-equi conditions (#6499) Code left_join(df1, df2, join_by(overlaps(xl, xu, yl, yu)), keep = FALSE) Condition Error in `left_join()`: ! Can't set `keep = FALSE` when using an inequality, rolling, or overlap join. --- Code full_join(df1, df2, join_by(overlaps(xl, xu, yl, yu)), keep = FALSE) Condition Error in `full_join()`: ! Can't set `keep = FALSE` when using an inequality, rolling, or overlap join. # join_mutate() validates arguments Code join_mutate(df, df, by = 1, type = "left") Condition Error: ! `by` must be a (named) character vector, list, `join_by()` result, or NULL, not the number 1. Code join_mutate(df, df, by = "x", type = "left", suffix = 1) Condition Error: ! `suffix` must be a character vector of length 2, not the number 1 of length 1. Code join_mutate(df, df, by = "x", type = "left", na_matches = "foo") Condition Error: ! `na_matches` must be one of "na" or "never", not "foo". Code join_mutate(df, df, by = "x", type = "left", keep = 1) Condition Error: ! `keep` must be `TRUE`, `FALSE`, or `NULL`, not the number 1. # join_filter() validates arguments Code join_filter(df, df, by = 1, type = "semi") Condition Error: ! `by` must be a (named) character vector, list, `join_by()` result, or NULL, not the number 1. Code join_filter(df, df, by = "x", type = "semi", na_matches = "foo") Condition Error: ! `na_matches` must be one of "na" or "never", not "foo". # mutating joins trigger many-to-many warning Code out <- left_join(df, df, join_by(x)) Condition Warning in `left_join()`: Detected an unexpected many-to-many relationship between `x` and `y`. i Row 1 of `x` matches multiple rows in `y`. i Row 1 of `y` matches multiple rows in `x`. i If a many-to-many relationship is expected, set `relationship = "many-to-many"` to silence this warning. # mutating joins compute common columns Code out <- left_join(df1, df2) Message Joining with `by = join_by(x)` # filtering joins compute common columns Code out <- semi_join(df1, df2) Message Joining with `by = join_by(x)` # mutating joins reference original column in `y` when there are type errors (#6465) Code (expect_error(left_join(x, y, by = join_by(a == b)))) Output Error in `left_join()`: ! Can't join `x$a` with `y$b` due to incompatible types. i `x$a` is a . i `y$b` is a . # filtering joins reference original column in `y` when there are type errors (#6465) Code (expect_error(semi_join(x, y, by = join_by(a == b)))) Output Error in `semi_join()`: ! Can't join `x$a` with `y$b` due to incompatible types. i `x$a` is a . i `y$b` is a . # error if passed additional arguments Code inner_join(df1, df2, on = "a") Condition Error in `inner_join()`: ! `...` must be empty. x Problematic argument: * on = "a" Code left_join(df1, df2, on = "a") Condition Error in `left_join()`: ! `...` must be empty. x Problematic argument: * on = "a" Code right_join(df1, df2, on = "a") Condition Error in `right_join()`: ! `...` must be empty. x Problematic argument: * on = "a" Code full_join(df1, df2, on = "a") Condition Error in `full_join()`: ! `...` must be empty. x Problematic argument: * on = "a" Code nest_join(df1, df2, on = "a") Condition Error in `nest_join()`: ! `...` must be empty. x Problematic argument: * on = "a" Code anti_join(df1, df2, on = "a") Condition Error in `anti_join()`: ! `...` must be empty. x Problematic argument: * on = "a" Code semi_join(df1, df2, on = "a") Condition Error in `semi_join()`: ! `...` must be empty. x Problematic argument: * on = "a" # nest_join computes common columns Code out <- nest_join(df1, df2) Message Joining with `by = join_by(x)` # nest_join references original column in `y` when there are type errors (#6465) Code (expect_error(nest_join(x, y, by = join_by(a == b)))) Output Error in `nest_join()`: ! Can't join `x$a` with `y$b` due to incompatible types. i `x$a` is a . i `y$b` is a . # validates inputs Code nest_join(df1, df2, by = 1) Condition Error in `nest_join()`: ! `by` must be a (named) character vector, list, `join_by()` result, or NULL, not the number 1. Code nest_join(df1, df2, keep = 1) Condition Error in `nest_join()`: ! `keep` must be `TRUE`, `FALSE`, or `NULL`, not the number 1. Code nest_join(df1, df2, name = 1) Condition Error in `nest_join()`: ! `name` must be a single string, not the number 1. Code nest_join(df1, df2, na_matches = 1) Condition Error in `nest_join()`: ! `na_matches` must be a string or character vector. # `by = character()` technically respects `unmatched` Code left_join(df1, df2, by = character(), unmatched = "error") Condition Error in `left_join()`: ! Each row of `y` must be matched by `x`. i Row 1 of `y` was not matched. # `by = character()` technically respects `relationship` Code left_join(df, df, by = character(), relationship = "many-to-one") Condition Error in `left_join()`: ! Each row in `x` must match at most 1 row in `y`. i Row 1 of `x` matches multiple rows in `y`. # `by = character()` for a cross join is deprecated (#6604) Code out <- left_join(df1, df2, by = character()) Condition Warning: Using `by = character()` to perform a cross join was deprecated in dplyr 1.1.0. i Please use `cross_join()` instead. --- Code out <- semi_join(df1, df2, by = character()) Condition Warning: Using `by = character()` to perform a cross join was deprecated in dplyr 1.1.0. i Please use `cross_join()` instead. --- Code out <- nest_join(df1, df2, by = character()) Condition Warning: Using `by = character()` to perform a cross join was deprecated in dplyr 1.1.0. i Please use `cross_join()` instead. # `by = named character()` for a cross join works Code out <- left_join(df1, df2, by = by) Condition Warning: Using `by = character()` to perform a cross join was deprecated in dplyr 1.1.0. i Please use `cross_join()` instead. # `by = list(x = character(), y = character())` for a cross join is deprecated (#6604) Code out <- left_join(df1, df2, by = list(x = character(), y = character())) Condition Warning: Using `by = character()` to perform a cross join was deprecated in dplyr 1.1.0. i Please use `cross_join()` instead. dplyr/tests/testthat/_snaps/across.md0000644000176200001440000003716114416000505017510 0ustar liggesusers# across(.unpack =) errors if the unpacked data frame has non-unique names Code mutate(df, across(x:y, fn, .unpack = "{outer}")) Condition Error in `mutate()`: i In argument: `across(x:y, fn, .unpack = "{outer}")`. Caused by error in `across()`: ! Names must be unique. x These names are duplicated: * "x" at locations 1 and 2. * "y" at locations 3 and 4. # `.unpack` is validated Code summarise(df, across(x, mean, .unpack = 1)) Condition Error in `summarise()`: i In argument: `across(x, mean, .unpack = 1)`. Caused by error in `across()`: ! `.unpack` must be `TRUE`, `FALSE`, or a single string, not the number 1. --- Code summarise(df, across(x, mean, .unpack = c("x", "y"))) Condition Error in `summarise()`: i In argument: `across(x, mean, .unpack = c("x", "y"))`. Caused by error in `across()`: ! `.unpack` must be `TRUE`, `FALSE`, or a single string, not a character vector. --- Code summarise(df, across(x, mean, .unpack = NA)) Condition Error in `summarise()`: i In argument: `across(x, mean, .unpack = NA)`. Caused by error in `across()`: ! `.unpack` must be `TRUE`, `FALSE`, or a single string, not `NA`. # across() throws meaningful error with failure during expansion (#6534) Code summarise(df, across(everything(), fn())) Condition Error in `summarise()`: i In argument: `across(everything(), fn())`. Caused by error in `fn()`: ! oh no! --- Code summarise(df, across(everything(), fn()), .by = g) Condition Error in `summarise()`: i In argument: `across(everything(), fn())`. Caused by error in `fn()`: ! oh no! --- Code summarise(gdf, across(everything(), fn())) Condition Error in `summarise()`: i In argument: `across(everything(), fn())`. Caused by error in `fn()`: ! oh no! # across() gives meaningful messages Code (expect_error(tibble(x = 1) %>% summarise(across(where(is.numeric), 42)))) Output Error in `summarise()`: i In argument: `across(where(is.numeric), 42)`. Caused by error in `across()`: ! `.fns` must be a function, a formula, or a list of functions/formulas. Code (expect_error(tibble(x = 1) %>% summarise(across(y, mean)))) Output Error in `summarise()`: i In argument: `across(y, mean)`. Caused by error in `across()`: ! Can't subset columns that don't exist. x Column `y` doesn't exist. Code (expect_error(tibble(x = 1) %>% summarise(res = across(where(is.numeric), 42)))) Output Error in `summarise()`: i In argument: `res = across(where(is.numeric), 42)`. Caused by error in `across()`: ! `.fns` must be a function, a formula, or a list of functions/formulas. Code (expect_error(tibble(x = 1) %>% summarise(z = across(y, mean)))) Output Error in `summarise()`: i In argument: `z = across(y, mean)`. Caused by error in `across()`: ! Can't subset columns that don't exist. x Column `y` doesn't exist. Code (expect_error(tibble(x = 1) %>% summarise(res = sum(if_any(where(is.numeric), 42))))) Output Error in `summarise()`: i In argument: `res = sum(if_any(where(is.numeric), 42))`. Caused by error in `if_any()`: ! `.fns` must be a function, a formula, or a list of functions/formulas. Code (expect_error(tibble(x = 1) %>% summarise(res = sum(if_all(~ mean(.x)))))) Output Error in `summarise()`: i In argument: `res = sum(if_all(~mean(.x)))`. Caused by error in `if_all()`: ! Must supply a column selection. i You most likely meant: `if_all(everything(), ~mean(.x))`. i The first argument `.cols` selects a set of columns. i The second argument `.fns` operates on each selected columns. Code (expect_error(tibble(x = 1) %>% summarise(res = sum(if_any(~ mean(.x)))))) Output Error in `summarise()`: i In argument: `res = sum(if_any(~mean(.x)))`. Caused by error in `if_any()`: ! Must supply a column selection. i You most likely meant: `if_any(everything(), ~mean(.x))`. i The first argument `.cols` selects a set of columns. i The second argument `.fns` operates on each selected columns. Code (expect_error(across())) Output Error in `across()`: ! Must only be used inside data-masking verbs like `mutate()`, `filter()`, and `group_by()`. Code (expect_error(c_across())) Output Error in `c_across()`: ! Must only be used inside data-masking verbs like `mutate()`, `filter()`, and `group_by()`. Code error_fn <- (function(.) { if (all(. > 10)) { rlang::abort("too small", call = call("error_fn")) } else { 42 } }) (expect_error(tibble(x = 1:10, y = 11:20) %>% summarise(across(everything(), error_fn)))) Output Error in `summarise()`: i In argument: `across(everything(), error_fn)`. Caused by error in `across()`: ! Can't compute column `y`. Caused by error in `error_fn()`: ! too small Code (expect_error(tibble(x = 1:10, y = 11:20) %>% mutate(across(everything(), error_fn)))) Output Error in `mutate()`: i In argument: `across(everything(), error_fn)`. Caused by error in `across()`: ! Can't compute column `y`. Caused by error in `error_fn()`: ! too small Code (expect_error(tibble(x = 1:10, y = 11:20) %>% summarise(force(across(everything(), error_fn))))) Output Error in `summarise()`: i In argument: `force(across(everything(), error_fn))`. Caused by error in `across()`: ! Can't compute column `y`. Caused by error in `error_fn()`: ! too small Code (expect_error(tibble(x = 1:10, y = 11:20) %>% mutate(force(across(everything(), error_fn))))) Output Error in `mutate()`: i In argument: `force(across(everything(), error_fn))`. Caused by error in `across()`: ! Can't compute column `y`. Caused by error in `error_fn()`: ! too small Code (expect_error(tibble(x = 1) %>% summarise(across(everything(), list(f = mean, f = mean))))) Output Error in `summarise()`: i In argument: `across(everything(), list(f = mean, f = mean))`. Caused by error in `across()`: ! Names must be unique. x These names are duplicated: * "x_f" at locations 1 and 2. # if_any() and if_all() aborts when predicate mistakingly used in .cols= (#5732) Code (expect_error(filter(df, if_any(~ .x > 5)))) Output Error in `filter()`: i In argument: `if_any(~.x > 5)`. Caused by error in `if_any()`: ! Must supply a column selection. i You most likely meant: `if_any(everything(), ~.x > 5)`. i The first argument `.cols` selects a set of columns. i The second argument `.fns` operates on each selected columns. Code (expect_error(filter(df, if_all(~ .x > 5)))) Output Error in `filter()`: i In argument: `if_all(~.x > 5)`. Caused by error in `if_all()`: ! Must supply a column selection. i You most likely meant: `if_all(everything(), ~.x > 5)`. i The first argument `.cols` selects a set of columns. i The second argument `.fns` operates on each selected columns. Code (expect_error(filter(df, !if_any(~ .x > 5)))) Output Error in `filter()`: i In argument: `!if_any(~.x > 5)`. Caused by error in `if_any()`: ! Must supply a column selection. i You most likely meant: `if_any(everything(), ~.x > 5)`. i The first argument `.cols` selects a set of columns. i The second argument `.fns` operates on each selected columns. Code (expect_error(filter(df, !if_all(~ .x > 5)))) Output Error in `filter()`: i In argument: `!if_all(~.x > 5)`. Caused by error in `if_all()`: ! Must supply a column selection. i You most likely meant: `if_all(everything(), ~.x > 5)`. i The first argument `.cols` selects a set of columns. i The second argument `.fns` operates on each selected columns. # inlined and non inlined lambdas work Code (expect_error(df %>% mutate(across(1:2, ~ .y + mean(bar))))) Output Error in `mutate()`: i In argument: `across(1:2, ~.y + mean(bar))`. Caused by error in `across()`: ! Can't compute column `foo`. Caused by error: ! the ... list contains fewer than 2 elements Code (expect_error(df %>% mutate((across(1:2, ~ .y + mean(bar)))))) Output Error in `mutate()`: i In argument: `(across(1:2, ~.y + mean(bar)))`. Caused by error in `across()`: ! Can't compute column `foo`. Caused by error in `fn()`: ! the ... list contains fewer than 2 elements # anonymous function `.fns` can access the `.data` pronoun even when not inlined Code mutate(df, across(y, fn)) Condition Error in `mutate()`: i In argument: `across(y, fn)`. Caused by error in `across()`: ! Can't compute column `y`. Caused by error: ! Can't subset `.data` outside of a data mask context. # can't rename during selection (#6522) Code mutate(df, z = c_across(c(y = x))) Condition Error in `mutate()`: i In argument: `z = c_across(c(y = x))`. Caused by error in `c_across()`: ! Can't rename variables in this context. # can't explicitly select grouping columns (#6522) Code mutate(gdf, y = c_across(g)) Condition Error in `mutate()`: i In argument: `y = c_across(g)`. i In group 1: `g = 1`. Caused by error in `c_across()`: ! Can't subset columns that don't exist. x Column `g` doesn't exist. # across() applies old `.cols = everything()` default with a warning Code out <- mutate(df, across(.fns = times_two)) Condition Warning: There was 1 warning in `mutate()`. i In argument: `across(.fns = times_two)`. Caused by warning: ! Using `across()` without supplying `.cols` was deprecated in dplyr 1.1.0. i Please supply `.cols` instead. --- Code out <- mutate(gdf, across(.fns = times_two)) Condition Warning: There was 1 warning in `mutate()`. i In argument: `across(.fns = times_two)`. Caused by warning: ! Using `across()` without supplying `.cols` was deprecated in dplyr 1.1.0. i Please supply `.cols` instead. --- Code out <- mutate(df, (across(.fns = times_two))) Condition Warning: There was 1 warning in `mutate()`. i In argument: `(across(.fns = times_two))`. Caused by warning: ! Using `across()` without supplying `.cols` was deprecated in dplyr 1.1.0. i Please supply `.cols` instead. --- Code out <- mutate(gdf, (across(.fns = times_two))) Condition Warning: There were 2 warnings in `mutate()`. The first warning was: i In argument: `(across(.fns = times_two))`. i In group 1: `g = 1`. Caused by warning: ! Using `across()` without supplying `.cols` was deprecated in dplyr 1.1.0. i Please supply `.cols` instead. i Run `dplyr::last_dplyr_warnings()` to see the 1 remaining warning. # if_any() and if_all() apply old `.cols = everything()` default with a warning Code out <- filter(df, if_any()) Condition Warning: Using `if_any()` without supplying `.cols` was deprecated in dplyr 1.1.0. i Please supply `.cols` instead. --- Code out <- filter(gdf, if_any()) Condition Warning: Using `if_any()` without supplying `.cols` was deprecated in dplyr 1.1.0. i Please supply `.cols` instead. --- Code out <- filter(df, if_all()) Condition Warning: Using `if_all()` without supplying `.cols` was deprecated in dplyr 1.1.0. i Please supply `.cols` instead. --- Code out <- filter(gdf, if_all()) Condition Warning: Using `if_all()` without supplying `.cols` was deprecated in dplyr 1.1.0. i Please supply `.cols` instead. --- Code out <- filter(df, (if_any())) Condition Warning: There was 1 warning in `filter()`. i In argument: `(if_any())`. Caused by warning: ! Using `if_any()` without supplying `.cols` was deprecated in dplyr 1.1.0. i Please supply `.cols` instead. --- Code out <- filter(gdf, (if_any())) Condition Warning: There were 2 warnings in `filter()`. The first warning was: i In argument: `(if_any())`. i In group 1: `g = 1`. Caused by warning: ! Using `if_any()` without supplying `.cols` was deprecated in dplyr 1.1.0. i Please supply `.cols` instead. i Run `dplyr::last_dplyr_warnings()` to see the 1 remaining warning. --- Code out <- filter(df, (if_all())) Condition Warning: There was 1 warning in `filter()`. i In argument: `(if_all())`. Caused by warning: ! Using `if_all()` without supplying `.cols` was deprecated in dplyr 1.1.0. i Please supply `.cols` instead. --- Code out <- filter(gdf, (if_all())) Condition Warning: There were 2 warnings in `filter()`. The first warning was: i In argument: `(if_all())`. i In group 1: `g = 1`. Caused by warning: ! Using `if_all()` without supplying `.cols` was deprecated in dplyr 1.1.0. i Please supply `.cols` instead. i Run `dplyr::last_dplyr_warnings()` to see the 1 remaining warning. # c_across() applies old `cols = everything()` default with a warning Code out <- mutate(df, z = sum(c_across())) Condition Warning: There were 2 warnings in `mutate()`. The first warning was: i In argument: `z = sum(c_across())`. i In row 1. Caused by warning: ! Using `c_across()` without supplying `cols` was deprecated in dplyr 1.1.0. i Please supply `cols` instead. i Run `dplyr::last_dplyr_warnings()` to see the 1 remaining warning. # across errors with non-empty dots and no `.fns` supplied (#6638) Code mutate(df, across(x, .funs = ~ . * 1000)) Condition Error in `mutate()`: i In argument: `across(x, .funs = ~. * 1000)`. Caused by error in `across()`: ! `...` must be empty. x Problematic argument: * .funs = ~. * 1000 # across(...) is deprecated Code summarise(df, across(everything(), mean, na.rm = TRUE)) Condition Warning: There was 1 warning in `summarise()`. i In argument: `across(everything(), mean, na.rm = TRUE)`. Caused by warning: ! The `...` argument of `across()` is deprecated as of dplyr 1.1.0. Supply arguments directly to `.fns` through an anonymous function instead. # Previously across(a:b, mean, na.rm = TRUE) # Now across(a:b, \(x) mean(x, na.rm = TRUE)) Output # A tibble: 1 x 1 x 1 1 dplyr/tests/testthat/_snaps/copy-to.md0000644000176200001440000000054714416000514017606 0ustar liggesusers# `auto_copy()` throws an informative error on different sources (#6798) Code auto_copy(df, NULL) Condition Error in `auto_copy()`: ! `x` and `y` must share the same src. i `x` is a object. i `y` is `NULL`. i Set `copy = TRUE` if `y` can be copied to the same source as `x` (may be slow). dplyr/tests/testthat/_snaps/colwise.md0000644000176200001440000000123214416000512017647 0ustar liggesusers# colwise utils gives meaningful error messages Code (expect_error(tbl_at_vars(iris, raw(3)))) Output Error: ! `.vars` must be a character/numeric vector or a `vars()` object, not a raw vector. Code (expect_error(tbl_if_vars(iris, list(identity, force), environment()))) Output Error: ! `.predicate` must have length 1, not 2. Code .funs <- as_fun_list(list(identity, force), caller_env()) (expect_error(tbl_if_vars(iris, .funs, environment()))) Output Error: ! `.predicate` must have length 1, not 2. dplyr/tests/testthat/test-bind-rows.R0000644000176200001440000001564614366556340017442 0ustar liggesuserstest_that("bind_rows() handles simple inputs", { df1 <- tibble(x = 1:2, y = letters[1:2]) df2 <- tibble(x = 3:4, y = letters[3:4]) out <- bind_rows(df1, df2) expect_equal(out, tibble(x = 1:4, y = letters[1:4])) }) test_that("bind_rows() reorders columns to match first df", { df1 <- tibble(x = 1, y = 2) df2 <- tibble(y = 1, x = 2) expect_named(bind_rows(df1, df2), c("x", "y")) }) test_that("bind_rows() returns union of columns", { df1 <- tibble(x = 1) df2 <- tibble(y = 2) out <- bind_rows(df1, df2) expect_equal(out, tibble(x = c(1, NA), y = c(NA, 2))) }) test_that("bind_rows() handles zero column data frames (#2175)", { df1 <- tibble(.rows = 1) df2 <- tibble(x = 1) out <- bind_rows(df1, df2) expect_equal(out, tibble(x = c(NA, 1))) }) test_that("bind_rows() handles zero row data frames (#597)", { df1 <- tibble(x = numeric()) df2 <- tibble(y = 1) out <- bind_rows(df1, df2) expect_equal(out, tibble(x = NA_real_, y = 1)) }) test_that("bind_rows() ignores NULL (#2056)", { df <- tibble(a = 1) expect_equal(bind_rows(df, NULL), df) expect_equal(bind_rows(list(df, NULL)), df) }) test_that("bind_rows() creates a column of identifiers (#1337)", { df1 <- tibble(x = 1:2) df2 <- tibble(x = 3) # with out <- bind_rows(a = df1, b = df2, .id = "id") expect_equal(out, tibble(id = c("a", "a", "b"), x = 1:3)) out <- bind_rows(list(a = df1, b = df2), .id = "id") expect_equal(out, tibble(id = c("a", "a", "b"), x = 1:3)) # or without names out <- bind_rows(df1, df2, .id = "id") expect_equal(out, tibble(id = c("1", "1", "2"), x = 1:3)) }) test_that("bind_rows deduplicates row names", { df1 <- data.frame(x = 1:2, row.names = c("a", "b")) df2 <- data.frame(x = 3:4, row.names = c("a", "c")) out <- bind_rows(df1, df2) expect_equal(rownames(out), c("a...1", "b", "a...3", "c")) }) test_that("bind_rows respects the drop attribute of grouped df",{ df <- tibble( e = 1, f = factor(c(1, 1, 2, 2), levels = 1:3), g = c(1, 1, 2, 2), x = c(1, 2, 1, 4) ) df <- group_by(df, e, f, g, .drop = FALSE) gg <- bind_rows(df, df) expect_equal(group_size(gg), c(4L,4L,0L)) }) # bind_rows() magic --------------------------------------------------- test_that("bind_rows() handles lists of data frames #1389", { df <- tibble(x = 1) res <- bind_rows(list(df, df), list(df, df)) expect_equal(nrow(res), 4) }) test_that("bind_rows() ignores empty lists (#2826)", { df <- tibble(x = 1:10) expect_equal(bind_rows(list(df, list())), df) }) test_that("bind_rows() accepts lists of dataframe-like lists as first argument", { ll <- list(a = 1, b = 2) expect_equal(bind_rows(list(ll)), tibble(a = 1, b = 2)) expect_equal(bind_rows(list(ll, ll)), tibble(a = c(1, 1), b = c(2, 2))) }) test_that("bind_rows() can handle lists (#1104)", { ll <- list(list(x = 1, y = "a"), list(x = 2, y = "b")) out <- bind_rows(ll) expect_equal(out, tibble(x = c(1, 2), y = c("a", "b"))) out <- bind_rows(ll[[1]], ll[2]) expect_equal(out, tibble(x = c(1, 2), y = c("a", "b"))) }) test_that("bind_rows() handles 0-length named list (#1515)", { x <- set_names(list()) expect_equal(bind_rows(x), tibble()) }) test_that("bind_rows() handles tibbles + vectors", { out <- bind_rows( tibble(a = 1, b = 2), c(a = 3, b = 4) ) expect_equal(out, tibble(a = c(1, 3), b = c(2, 4))) out <- bind_rows( a = c(a = 1, b = 2), b = c(a = 3, b = 4), .id = "id" ) expect_equal(out, tibble(id = c("a", "b"), a = c(1, 3), b = c(2, 4))) }) test_that("bind_rows() only flattens S3 lists that inherit from list (#3924)", { df <- data.frame(x = 1, y = 2) lst1 <- structure(list(df, df, df), class = "special_lst") expect_snapshot(bind_rows(lst1), error = TRUE) lst2 <- structure(list(df, df, df), class = c("special_lst", "list")) expect_equal(bind_rows(lst2), bind_rows(df,df,df)) }) test_that("bind_rows() handles named list", { x <- list(x = 1, y = 2, z = 3) expect_equal(bind_rows(x), tibble(x = 1, y = 2, z = 3)) }) test_that("bind_rows() validates lists (#5417)", { out <- bind_rows(list(x = 1), list(x = 1, y = 1:2)) expect_equal(out, tibble(x = c(1, 1, 1), y = c(NA, 1:2))) expect_snapshot(bind_rows(list(x = 1), list(x = 1:3, y = 1:2)), error = TRUE) }) test_that("bind_rows() handles missing, null, and empty elements (#5429)", { x <- list(a = NULL, b = NULL) y <- list(a = "B", b = 2) l <- list(x, y) expect_identical( bind_rows(l), tibble(a = "B", b = 2) ) x <- list(a = NULL, b = 1) y <- list(a = "B", b = 2) l <- list(x, y) expect_identical( bind_rows(l), tibble(b = c(1, 2), a = c(NA, "B")) ) x <- list(a = character(0), b = 1) y <- list(a = "B", b = 2) l <- list(x, y) expect_identical( bind_rows(l), tibble(a = "B", b = 2) ) }) test_that("bind_rows(.id= NULL) does not set names (#5089)", { out <- bind_rows(list(a = tibble(x = 1:2))) expect_equal(attr(out, "row.names"), 1:2) out <- bind_rows(x = c(a = 1)) expect_identical(attr(out, "row.names"), 1L) }) # Column coercion -------------------------------------------------------------- test_that("bind_rows() promotes integer to numeric", { df1 <- tibble(a = 1L, b = 1L) df2 <- tibble(a = 1, b = 1L) res <- bind_rows(df1, df2) expect_type(res$a, "double") expect_type(res$b, "integer") }) test_that("bind_rows() coerces factor when levels don't match", { df1 <- data.frame(a = factor("a")) df2 <- data.frame(a = factor("b")) res <- bind_rows(df1, df2) expect_equal(res$a, factor(c("a", "b"))) }) test_that("bind_rows() handles complex. #933", { df1 <- tibble(x = 1 + 1i) df2 <- tibble(x = 2 + 1i) out <- bind_rows(df1, df2) expect_equal(out, tibble(x = c(1 + 1i, 2 + 1i))) }) test_that("bind_rows() handles POSIXct (#1125)", { df1 <- data.frame(date = as.POSIXct(NA)) df2 <- data.frame(date = as.POSIXct("2015-05-05")) res <- bind_rows(df1, df2) expect_equal(nrow(res), 2L) expect_true(is.na(res$date[1])) }) test_that("bind_rows() accepts data frame columns (#2015)", { df1 <- tibble(x = 1, y = tibble(a = 1, b = 1)) df2 <- tibble(x = 2, y = tibble(a = 2, b = 2)) out <- bind_rows(df1, df2) expect_equal(out, tibble(x = 1:2, y = tibble(a = 1:2, b = 1:2))) }) test_that("bind_rows() accepts difftime objects", { df1 <- data.frame(x = as.difftime(1, units = "hours")) df2 <- data.frame(x = as.difftime(1, units = "mins")) res <- bind_rows(df1, df2) expect_equal(res$x, as.difftime(c(3600, 60), units = "secs")) }) # Errors ------------------------------------------------------------ test_that("bind_rows() give informative errors", { expect_snapshot({ "invalid .id" df1 <- tibble(x = 1:3) df2 <- tibble(x = 4:6) (expect_error(bind_rows(df1, df2, .id = 5))) "invalid type" ll <- list(tibble(a = 1:5), env(a = 1)) (expect_error(bind_rows(ll))) df1 <- tibble(a = factor("a")) df2 <- tibble(a = 1L) (expect_error(bind_rows(df1, df2))) "unnamed vectors" (expect_error(bind_rows(1:2))) }) }) dplyr/tests/testthat/test-by.R0000644000176200001440000000471514366556340016143 0ustar liggesuserstest_that("computes group data when `by` is set", { df <- tibble(x = c(1, 1, 2, 2, 1)) out <- compute_by(by = x, data = df) expect_identical(out$type, "grouped") expect_identical(out$names, "x") expect_identical( out$data, tibble(x = c(1, 2), ".rows" := list_of(c(1L, 2L, 5L), c(3L, 4L))) ) }) test_that("computes `by` group data in order of appearance", { df <- tibble( x = c(5, 4, 5, 5), y = c(2, 3, 1, 2) ) out <- compute_by(by = c(x, y), data = df) expect <- tibble( x = c(5, 4, 5), y = c(2, 3, 1), ".rows" := list_of(c(1L, 4L), 2L, 3L) ) expect_identical(out$data, expect) }) test_that("extracts existing data when `by = NULL`", { df <- data.frame(x = c(1, 1, 2, 2, 1)) out <- compute_by(by = NULL, data = df) expect_identical(out$type, "ungrouped") expect_identical(out$names, character()) # `compute_by()` is always type stable on `$data` and returns a bare tibble expect_identical(out$data, as_tibble(group_data(df))) df <- tibble(x = c(1, 1, 2, 2, 1)) out <- compute_by(by = NULL, data = df) expect_identical(out$type, "ungrouped") expect_identical(out$names, character()) expect_identical(out$data, group_data(df)) gdf <- group_by(df, x) out <- compute_by(by = NULL, data = gdf) expect_identical(out$type, "grouped") expect_identical(out$names, "x") expect_identical(out$data, group_data(gdf)) rdf <- rowwise(df) out <- compute_by(by = NULL, data = rdf) expect_identical(out$type, "rowwise") expect_identical(out$names, character()) expect_identical(out$data, group_data(rdf)) }) test_that("empty selection results in ungrouped group data", { df <- tibble(x = 1) out <- compute_by(by = c(), data = df) expect_identical(out$type, "ungrouped") expect_identical(out$names, character()) expect_identical(out$data, group_data(df)) }) test_that("throws tidyselect errors", { df <- tibble(x = 1) expect_snapshot(error = TRUE, { compute_by(by = y, data = df) }) }) test_that("can't set `.by` with a grouped-df", { df <- tibble(x = 1:5) gdf <- group_by(df, x) expect_snapshot(error = TRUE, { compute_by(x, gdf) }) }) test_that("can't set `.by` with a rowwise-df", { df <- tibble(x = 1:5) rdf <- rowwise(df) expect_snapshot(error = TRUE, { compute_by(x, rdf) }) }) test_that("can tweak the error args", { df <- tibble(x = 1:5) gdf <- group_by(df, x) expect_snapshot(error = TRUE, { compute_by(x, gdf, by_arg = "x", data_arg = "dat") }) }) dplyr/tests/testthat/test-consecutive-id.R0000644000176200001440000000112714366556340020444 0ustar liggesuserstest_that("works with simple vectors", { expect_equal(consecutive_id(c(1, 1, 2, 1, 2)), c(1, 1, 2, 3, 4)) }) test_that("handles data frames", { df <- tibble(x = c(1, 1, 1, 1), y = c(1, 2, 2, 1)) expect_equal(consecutive_id(df), c(1, 2, 2, 3)) }) test_that("follows recycling rules", { expect_equal(consecutive_id(double(), 1), integer()) expect_equal(consecutive_id(1:2, 1), 1:2) expect_snapshot(error = TRUE, { consecutive_id(1:3, 1:4) }) }) test_that("generates useful errors", { expect_snapshot(error = TRUE, { consecutive_id(x = 1:4) consecutive_id(mean) }) }) dplyr/tests/testthat/test-deprec-context.R0000644000176200001440000000411614406402754020442 0ustar liggesuserstest_that("cur_data() is soft deprecated", { options(lifecycle_verbosity = "warning") df <- tibble(x = 1) expect_snapshot(mutate(df, y = cur_data())) }) test_that("cur_data_all() is soft deprecated", { options(lifecycle_verbosity = "warning") df <- tibble(x = 1) expect_snapshot(mutate(df, y = cur_data_all())) }) test_that("cur_data() gives current data without groups, cur_data_all() includes groups", { options(lifecycle_verbosity = "quiet") df <- tibble(x = c("b", "a", "b"), y = 1:3) gf <- group_by(df, x) expect_equal( df %>% summarise(x = list(cur_data())) %>% pull(), list(df) ) expect_equal( gf %>% summarise(x = list(cur_data())) %>% pull(), list(tibble(y = 2L), tibble(y = c(1L, 3L))) ) expect_equal( gf %>% summarise(x = list(cur_data_all())) %>% pull(), list(tibble(x = "a", y = 2L), tibble(x = "b", y = c(1L, 3L))) ) }) test_that("cur_data()/cur_data_all() keeps list columns as lists in rowwise_df (#5901)", { options(lifecycle_verbosity = "quiet") df <- tibble(x = list(tibble(a = 1), tibble(a = 2))) %>% rowwise() expect_true( all(summarise(df, test = obj_is_list(cur_data()$x))$test) ) expect_true( all(summarise(df, test = obj_is_list(cur_data_all()$x))$test) ) }) test_that("cur_data() and cur_data_all() work sequentially", { options(lifecycle_verbosity = "quiet") df <- tibble(a = 1) expect_equal( mutate(df, x = ncol(cur_data()), y = ncol(cur_data())), tibble(a = 1, x = 1, y = 2) ) gf <- tibble(a = 1, b = 2) %>% group_by(a) expect_equal( mutate(gf, x = ncol(cur_data_all()), y = ncol(cur_data_all())), group_by(tibble(a = 1, b = 2, x = 2, y = 3), a) ) }) test_that("mutate(=NULL) preserves correct all_vars", { options(lifecycle_verbosity = "quiet") df <- data.frame(x = 1, y = 2) %>% mutate(x = NULL, vars = cur_data_all()) %>% pull() expect_equal(df, tibble(y = 2)) }) test_that("give useful error messages when not applicable", { options(lifecycle_verbosity = "quiet") expect_snapshot({ (expect_error(cur_data())) (expect_error(cur_data_all())) }) }) dplyr/tests/testthat/test-select.R0000644000176200001440000001354714366556340017013 0ustar liggesuserstest_that("select preserves grouping", { gf <- group_by(tibble(g = 1:3, x = 3:1), g) i <- count_regroups(out <- select(gf, h = g)) expect_equal(i, 0) expect_equal(group_vars(out), "h") }) test_that("grouping variables preserved with a message, unless already selected (#1511, #5841)", { df <- tibble(g = 1:3, x = 3:1) %>% group_by(g) expect_snapshot({ res <- select(df, x) }) expect_named(res, c("g", "x")) df <- tibble(a = 1, b = 2, c = 3) %>% group_by(a) expect_equal(df %>% select(a = b), tibble(a = 2)) df <- tibble(a = 1, b = 2, c = 3) %>% group_by(a, b) expect_snapshot({ expect_equal(df %>% select(a = c), tibble(b = 2, a = 3) %>% group_by(b)) expect_equal(df %>% select(b = c), tibble(a = 1, b = 3) %>% group_by(a)) }) }) test_that("non-syntactic grouping variable is preserved (#1138)", { expect_snapshot( df <- tibble(`a b` = 1L) %>% group_by(`a b`) %>% select() ) expect_named(df, "a b") }) test_that("select doesn't fail if some names missing", { df1 <- data.frame(x = 1:10, y = 1:10, z = 1:10) df2 <- setNames(df1, c("x", "y", "")) # df3 <- setNames(df1, c("x", "", "")) expect_equal(select(df1, x), data.frame(x = 1:10)) expect_equal(select(df2, x), data.frame(x = 1:10)) # expect_equal(select(df3, x), data.frame(x = 1:10)) }) # Special cases ------------------------------------------------- test_that("select with no args returns nothing", { empty <- select(mtcars) expect_equal(ncol(empty), 0) expect_equal(nrow(empty), 32) empty <- select(mtcars, !!!list()) expect_equal(ncol(empty), 0) expect_equal(nrow(empty), 32) }) test_that("select excluding all vars returns nothing", { expect_equal(dim(select(mtcars, -(mpg:carb))), c(32, 0)) expect_equal(dim(select(mtcars, starts_with("x"))), c(32, 0)) expect_equal(dim(select(mtcars, -matches("."))), c(32, 0)) }) test_that("negating empty match returns everything", { df <- data.frame(x = 1:3, y = 3:1) expect_equal(select(df, -starts_with("xyz")), df) }) test_that("can select with duplicate columns", { df <- tibble(x = 1, x = 2, y = 1, .name_repair = "minimal") # can extract duplicate cols by position expect_named(df %>% select(1, 3), c("x", "y")) # can select out non-duplicated columns expect_named(df %>% select(y), "y") }) # Select variables ----------------------------------------------- test_that("select can be before group_by (#309)", { df <- data.frame( id = c(1, 1, 2, 2, 2, 3, 3, 4, 4, 5), year = c(2013, 2013, 2012, 2013, 2013, 2013, 2012, 2012, 2013, 2013), var1 = rnorm(10) ) dfagg <- df %>% group_by(id, year) %>% select(id, year, var1) %>% summarise(var1 = mean(var1)) expect_equal(names(dfagg), c("id", "year", "var1")) }) test_that("select succeeds in presence of raw columns (#1803)", { df <- tibble(a = 1:3, b = as.raw(1:3)) expect_identical(select(df, a), df["a"]) expect_identical(select(df, b), df["b"]) expect_identical(select(df, -b), df["a"]) }) test_that("arguments to select() don't match vars_select() arguments", { df <- tibble(a = 1) expect_identical(select(df, var = a), tibble(var = 1)) expect_identical(select(group_by(df, a), var = a), group_by(tibble(var = 1), var)) expect_identical(select(df, exclude = a), tibble(exclude = 1)) expect_identical(select(df, include = a), tibble(include = 1)) expect_identical(select(group_by(df, a), exclude = a), group_by(tibble(exclude = 1), exclude)) expect_identical(select(group_by(df, a), include = a), group_by(tibble(include = 1), include)) }) test_that("can select() with deprecated `.data` pronoun (#2715)", { withr::local_options(lifecycle_verbosity = "quiet") expect_identical(select(mtcars, .data$cyl), select(mtcars, cyl)) }) test_that("can select() with character vectors", { expect_identical(select(mtcars, "cyl", !!"disp", c("cyl", "am", "drat")), mtcars[c("cyl", "disp", "am", "drat")]) }) test_that("select() treats NULL inputs as empty", { expect_identical(select(mtcars, cyl), select(mtcars, NULL, cyl, NULL)) }) test_that("can select() with strings and character vectors", { vars <- c(foo = "cyl", bar = "am") expect_identical(select(mtcars, !!!vars), select(mtcars, foo = cyl, bar = am)) expect_identical(select(mtcars, !!vars), select(mtcars, foo = cyl, bar = am)) }) test_that("select works on empty names (#3601)", { df <- data.frame(x=1, y=2, z=3) colnames(df) <- c("x","y","") expect_identical(select(df, x)$x, 1) colnames(df) <- c("","y","z") expect_identical(select(df, y)$y, 2) }) test_that("select works on NA names (#3601)", { df <- data.frame(x=1, y=2, z=3) colnames(df) <- c("x","y",NA) expect_identical(select(df, x)$x, 1) colnames(df) <- c(NA,"y","z") expect_identical(select(df, y)$y, 2) }) test_that("select() keeps attributes of raw data frames (#5831)", { df <- data.frame(x = 1) attr(df, "a") <- "b" expect_equal(attr(select(df, x), "a"), "b") }) test_that("select() provides informative errors", { expect_snapshot({ (expect_error(select(mtcars, 1 + ""))) }) }) # dplyr_col_select() ------------------------------------------------------ test_that("dplyr_col_select() aborts when `[` implementation is broken", { local_methods( "[.dplyr_test_broken_operator" = function(x, ...) { unclass(x) }, "[.dplyr_test_operator_wrong_size" = function(x, ...) { data.frame() } ) df1 <- new_tibble(list(x = 1), nrow = 1L, class = "dplyr_test_broken_operator") expect_snapshot({ (expect_error( select(df1, 1:2) )) (expect_error( select(df1, 0) )) }) df2 <- new_tibble(list(x = 1), nrow = 1L, class = "dplyr_test_operator_wrong_size") expect_error(select(df2, 1:2)) expect_snapshot({ # from vctrs (expect_error( select(df1, 2) )) # not returning a data frame (expect_error( select(df1, 1) )) # unexpected number of columns (expect_error( select(df2, 1) )) }) }) dplyr/tests/testthat/test-group-trim.R0000644000176200001440000000117014366556340017626 0ustar liggesuserstest_that("group_trim() is identity on non grouped data", { expect_identical(group_trim(iris), iris) }) test_that("group_trim() always regroups even if no factors", { res <- mtcars %>% group_by(cyl) %>% filter(cyl == 6, .preserve = TRUE) %>% group_trim() expect_equal(n_groups(res), 1L) }) test_that("group_trim() drops factor levels in data and grouping structure", { res <- iris %>% group_by(Species) %>% filter(Species == "setosa") %>% group_trim() expect_equal(n_groups(res), 1L) expect_equal(levels(res$Species), "setosa") expect_equal(levels(attr(res, "groups")$Species), "setosa") }) dplyr/tests/testthat/test-groups-with.R0000644000176200001440000000116213663216626020011 0ustar liggesuserstest_that("restores original class", { df <- data.frame(x = 1:2) gf <- group_by(df, x) expect_s3_class(with_groups(df, x, mutate), "data.frame", exact = TRUE) expect_s3_class(with_groups(gf, x, mutate), "grouped_df") }) test_that(".groups = NULL ungroups", { gf <- group_by(tibble(x = 1:2), x) out <- gf %>% with_groups(NULL, mutate, y = mean(x)) expect_equal(out$y, c(1.5, 1.5)) }) test_that(".groups is defused with context", { local_fn <- identity expect_identical( with_groups(mtcars, local_fn(2), mutate, disp = disp / sd(disp)), with_groups(mtcars, 2, mutate, disp = disp / sd(disp)) ) }) dplyr/tests/testthat/test-join-cross.R0000644000176200001440000000243014366556340017607 0ustar liggesuserstest_that("cross join works", { df1 <- tibble(x = 1:2) df2 <- tibble(y = 1:3) expect_identical( cross_join(df1, df2), tibble( x = vec_rep_each(1:2, times = 3), y = vec_rep(1:3, times = 2) ) ) }) test_that("cross join results in 0 rows if either input has 0 rows", { df1 <- tibble(x = 1:2) df2 <- tibble(y = integer()) expect_identical( cross_join(df1, df2), tibble(x = integer(), y = integer()) ) expect_identical( cross_join(df2, df1), tibble(y = integer(), x = integer()) ) }) test_that("cross join works with 0 column, >0 row tibbles", { df1 <- new_tibble(list(), nrow = 3) df2 <- tibble(x = 1:2) expect_identical( cross_join(df1, df1), new_tibble(list(), nrow = 9) ) expect_identical( cross_join(df1, df2), vec_rep(df2, times = 3) ) }) test_that("cross join applies `suffix`", { df1 <- tibble(x = 1, y = 2) df2 <- tibble(x = 2, z = 3) expect_named(cross_join(df1, df2), c("x.x", "y", "x.y", "z")) expect_named(cross_join(df1, df2, suffix = c("", "_y")), c("x", "y", "x_y", "z")) }) test_that("cross join checks for duplicate names", { df1 <- tibble(a = 1, b = 2, a = 3, .name_repair = "minimal") df2 <- tibble(a = 2, c = 3) expect_snapshot(error = TRUE, { cross_join(df1, df2) }) }) dplyr/tests/testthat/test-reframe.R0000644000176200001440000001711114366556340017144 0ustar liggesuserstest_that("`reframe()` allows summaries", { df <- tibble(g = c(1, 1, 1, 2, 2), x = 1:5) expect_identical( reframe(df, x = mean(x)), tibble(x = 3) ) expect_identical( reframe(df, x = mean(x), .by = g), tibble(g = c(1, 2), x = c(2, 4.5)) ) }) test_that("`reframe()` allows size 0 results", { df <- tibble(g = c(1, 1, 1, 2, 2), x = 1:5) gdf <- group_by(df, g) expect_identical( reframe(df, x = which(x > 5)), tibble(x = integer()) ) expect_identical( reframe(df, x = which(x > 5), .by = g), tibble(g = double(), x = integer()) ) expect_identical( reframe(gdf, x = which(x > 5)), tibble(g = double(), x = integer()) ) }) test_that("`reframe()` allows size >1 results", { df <- tibble(g = c(1, 1, 1, 2, 2), x = 1:5) gdf <- group_by(df, g) expect_identical( reframe(df, x = which(x > 2)), tibble(x = 3:5) ) expect_identical( reframe(df, x = which(x > 2), .by = g), tibble(g = c(1, 2, 2), x = c(3L, 1L, 2L)) ) expect_identical( reframe(gdf, x = which(x > 2)), tibble(g = c(1, 2, 2), x = c(3L, 1L, 2L)) ) }) test_that("`reframe()` recycles across columns", { df <- tibble(g = c(1, 1, 2, 2, 2), x = 1:5) out <- reframe(df, a = 1:2, b = 1L, c = 2:3) expect_identical(out$a, 1:2) expect_identical(out$b, c(1L, 1L)) expect_identical(out$c, 2:3) out <- reframe(df, a = 1:2, b = 1L, c = 2:3, .by = g) expect_identical(out$g, c(1, 1, 2, 2)) expect_identical(out$a, c(1:2, 1:2)) expect_identical(out$b, c(1L, 1L, 1L, 1L)) expect_identical(out$c, c(2:3, 2:3)) }) test_that("`reframe()` can recycle across columns to size 0", { df <- tibble(g = 1:2, x = 1:2) gdf <- group_by(df, g) out <- reframe(df, y = mean(x), z = which(x > 3)) expect_identical(out$y, double()) expect_identical(out$z, integer()) out <- reframe(df, y = mean(x), z = which(x > 1), .by = g) expect_identical(out$g, 2L) expect_identical(out$y, 2) expect_identical(out$z, 1L) out <- reframe(gdf, y = mean(x), z = which(x > 1)) expect_identical(out$g, 2L) expect_identical(out$y, 2) expect_identical(out$z, 1L) }) test_that("`reframe()` throws intelligent recycling errors", { df <- tibble(g = 1:2, x = 1:2) gdf <- group_by(df, g) expect_snapshot(error = TRUE, { reframe(df, x = 1:2, y = 3:5) }) expect_snapshot(error = TRUE, { reframe(df, x = 1:2, y = 3:5, .by = g) }) expect_snapshot(error = TRUE, { reframe(gdf, x = 1:2, y = 3:5) }) }) test_that("`reframe()` can return more rows than the original data frame", { df <- tibble(x = 1:2) expect_identical( reframe(df, x = vec_rep_each(x, x)), tibble(x = c(1L, 2L, 2L)) ) }) test_that("`reframe()` doesn't message about regrouping when multiple group columns are supplied", { df <- tibble(a = c(1, 1, 2, 2, 2), b = c(1, 2, 1, 1, 2), x = 1:5) gdf <- group_by(df, a, b) # Silence expect_snapshot({ out <- reframe(df, x = mean(x), .by = c(a, b)) }) expect_snapshot({ out <- reframe(gdf, x = mean(x)) }) }) test_that("`reframe()` doesn't message about regrouping when >1 rows are returned per group", { df <- tibble(g = c(1, 1, 2, 2, 2), x = 1:5) gdf <- group_by(df, g) # Silence expect_snapshot({ out <- reframe(df, x = vec_rep_each(x, x), .by = g) }) expect_snapshot({ out <- reframe(gdf, x = vec_rep_each(x, x)) }) }) test_that("`reframe()` allows sequential assignments", { df <- tibble(g = 1:2, x = 1:2) expect_identical( reframe(df, y = 3, z = mean(x) + y), tibble(y = 3, z = 4.5) ) expect_identical( reframe(df, y = 3, z = mean(x) + y, .by = g), tibble(g = 1:2, y = c(3, 3), z = c(4, 5)) ) }) test_that("`reframe()` allows for overwriting existing columns", { df <- tibble(g = c("a", "b"), x = 1:2) expect_identical( reframe(df, x = 3, z = x), tibble(x = 3, z = 3) ) expect_identical( reframe(df, x = cur_group_id(), z = x, .by = g), tibble(g = c("a", "b"), x = 1:2, z = 1:2) ) }) test_that("`reframe()` works with unquoted values", { df <- tibble(x = 1:5) expect_equal(reframe(df, out = !!1), tibble(out = 1)) expect_equal(reframe(df, out = !!quo(1)), tibble(out = 1)) expect_equal(reframe(df, out = !!(1:2)), tibble(out = 1:2)) }) test_that("`reframe()` with bare data frames always returns a bare data frame", { df <- data.frame(g = c(1, 1, 2, 1, 2), x = c(5, 2, 1, 2, 3)) out <- reframe(df, x = mean(x)) expect_s3_class(out, class(df), exact = TRUE) out <- reframe(df, x = mean(x), .by = g) expect_s3_class(out, class(df), exact = TRUE) }) test_that("`reframe()` drops data frame attributes", { # Because `reframe()` theoretically creates a "new" data frame # With data.frames df <- data.frame(g = c(1, 1, 2), x = c(1, 2, 1)) attr(df, "foo") <- "bar" out <- reframe(df, x = mean(x)) expect_null(attr(out, "foo")) out <- reframe(df, x = mean(x), .by = g) expect_null(attr(out, "foo")) # With tibbles tbl <- as_tibble(df) attr(tbl, "foo") <- "bar" out <- reframe(tbl, x = mean(x)) expect_null(attr(out, "foo")) out <- reframe(tbl, x = mean(x), .by = g) expect_null(attr(out, "foo")) # With grouped_df gdf <- group_by(df, g) attr(gdf, "foo") <- "bar" out <- reframe(gdf, x = mean(x)) expect_null(attr(out, "foo")) }) test_that("`reframe()` with `group_by()` sorts keys", { df <- tibble(g = c(2, 1, 2, 0), x = c(4, 2, 8, 5)) df <- group_by(df, g) out <- reframe(df, x = mean(x)) expect_identical(out$g, c(0, 1, 2)) expect_identical(out$x, c(5, 2, 6)) }) test_that("`reframe()` with `group_by()` respects `.drop = FALSE`", { g <- factor(c("c", "a", "c"), levels = c("a", "b", "c")) df <- tibble(g = g, x = c(1, 4, 2)) gdf <- group_by(df, g, .drop = FALSE) out <- reframe(gdf, x = mean(x)) expect_identical(out$g, factor(c("a", "b", "c"))) expect_identical(out$x, c(4, NaN, 1.5)) }) test_that("`reframe()` with `group_by()` always returns an ungrouped tibble", { df <- tibble(a = c(1, 1, 2, 2, 2), b = c(1, 2, 1, 1, 2), x = 1:5) gdf <- group_by(df, a, b) out <- reframe(gdf, x = mean(x)) expect_identical(class(out), class(df)) }) test_that("`reframe()` with `rowwise()` respects list-col element access", { df <- tibble(x = list(1:2, 3:5, 6L)) rdf <- rowwise(df) expect_identical( reframe(rdf, x), tibble(x = 1:6) ) }) test_that("`reframe()` with `rowwise()` respects rowwise group columns", { df <- tibble(g = c(1, 1, 2), x = list(1:2, 3:5, 6L)) rdf <- rowwise(df, g) out <- reframe(rdf, x) expect_identical(out$g, c(rep(1, 5), 2)) expect_identical(out$x, 1:6) }) test_that("`reframe()` with `rowwise()` always returns an ungrouped tibble", { df <- tibble(g = c(1, 1, 2), x = list(1:2, 3:5, 6L)) rdf <- rowwise(df, g) expect_s3_class(reframe(rdf, x), class(df), exact = TRUE) }) # .by ---------------------------------------------------------------------- test_that("can group transiently using `.by`", { df <- tibble(g = c(1, 1, 2, 1, 2), x = c(5, 2, 1, 2, 3)) out <- reframe(df, x = mean(x), .by = g) expect_identical(out$g, c(1, 2)) expect_identical(out$x, c(3, 2)) expect_s3_class(out, class(df), exact = TRUE) }) test_that("transient grouping orders by first appearance", { df <- tibble(g = c(2, 1, 2, 0), x = c(4, 2, 8, 5)) out <- reframe(df, x = mean(x), .by = g) expect_identical(out$g, c(2, 1, 0)) expect_identical(out$x, c(6, 2, 5)) }) test_that("catches `.by` with grouped-df", { df <- tibble(x = 1) gdf <- group_by(df, x) expect_snapshot(error = TRUE, { reframe(gdf, .by = x) }) }) test_that("catches `.by` with rowwise-df", { df <- tibble(x = 1) rdf <- rowwise(df) expect_snapshot(error = TRUE, { reframe(rdf, .by = x) }) }) dplyr/tests/testthat/test-distinct.R0000644000176200001440000001173014366556340017345 0ustar liggesuserstest_that("distinct equivalent to local unique when keep_all is TRUE", { df <- data.frame( x = c(1, 1, 1, 1), y = c(1, 1, 2, 2), z = c(1, 2, 1, 2) ) expect_equal(distinct(df), unique(df)) }) test_that("distinct for single column works as expected (#1937)", { df <- tibble( x = c(1, 1, 1, 1), y = c(1, 1, 2, 2), z = c(1, 2, 1, 2) ) expect_equal(distinct(df, x, .keep_all = FALSE), unique(df["x"])) expect_equal(distinct(df, y, .keep_all = FALSE), unique(df["y"])) }) test_that("distinct works for 0-sized columns (#1437)", { df <- tibble(x = 1:10) %>% select(-x) ddf <- distinct(df) expect_equal(ncol(ddf), 0L) }) test_that("if no variables specified, uses all", { df <- tibble(x = c(1, 1), y = c(2, 2)) expect_equal(distinct(df), tibble(x = 1, y = 2)) }) test_that("distinct keeps only specified cols", { df <- tibble(x = c(1, 1, 1), y = c(1, 1, 1)) expect_equal(df %>% distinct(x), tibble(x = 1)) }) test_that("unless .keep_all = TRUE", { df <- tibble(x = c(1, 1, 1), y = 3:1) expect_equal(df %>% distinct(x), tibble(x = 1)) expect_equal(df %>% distinct(x, .keep_all = TRUE), tibble(x = 1, y = 3L)) }) test_that("distinct doesn't duplicate columns", { df <- tibble(a = 1:3, b = 4:6) expect_named(df %>% distinct(a, a), "a") expect_named(df %>% group_by(a) %>% distinct(a), "a") }) test_that("grouped distinct always includes group cols", { df <- tibble(g = c(1, 2), x = c(1, 2)) out <- df %>% group_by(g) %>% distinct(x) expect_named(out, c("g", "x")) }) test_that("empty grouped distinct equivalent to empty ungrouped", { df <- tibble(g = c(1, 2), x = c(1, 2)) df1 <- df %>% distinct() %>% group_by(g) df2 <- df %>% group_by(g) %>% distinct() expect_equal(df1, df2) }) test_that("distinct on a new, mutated variable is equivalent to mutate followed by distinct", { df <- tibble(g = c(1, 2), x = c(1, 2)) df1 <- df %>% distinct(aa = g * 2) df2 <- df %>% mutate(aa = g * 2) %>% distinct(aa) expect_equal(df1, df2) }) test_that("distinct on a new, copied variable is equivalent to mutate followed by distinct (#3234)", { df <- tibble(g = c(1, 2), x = c(1, 2)) df1 <- df %>% distinct(aa = g) df2 <- df %>% mutate(aa = g) %>% distinct(aa) expect_equal(df1, df2) }) test_that("distinct on a dataframe or tibble with columns of type list throws an error", { df <- tibble( a = c("1", "1", "2", "2", "3", "3"), b = list("A") ) df2 <- data.frame(x = 1:5, y = I(list(1:3, 2:4, 3:5, 4:6, 5:7))) expect_identical(df2 %>% distinct(), df2) expect_identical(df %>% distinct(), df %>% slice(c(1, 3, 5))) }) test_that("distinct handles 0 columns edge case (#2954)", { d <- select(data.frame(x= c(1, 1)), one_of(character(0))) res <- distinct(d) expect_equal(nrow(res), 1L) expect_equal(nrow(distinct(tibble())), 0L) }) test_that("distinct respects order of the specified variables (#3195, #6156)",{ d <- data.frame(x = 1:2, y = 3:4) expect_named(distinct(d, y, x), c("y", "x")) }) test_that("distinct adds grouping variables to front if missing",{ d <- data.frame(x = 1:2, y = 3:4) expect_named(distinct(group_by(d, y), x), c("y", "x")) expect_named(distinct(group_by(d, y), x, y), c("x", "y")) }) test_that("distinct() understands both NA variants (#4516)", { df <- data.frame(col_a = c(1, NA, NA)) df$col_a <- df$col_a+0 df$col_a[2] <- NA_real_ expect_equal(nrow(distinct(df)), 2L) df_1 <- data.frame(col_a = c(1, NA)) df_2 <- data.frame(col_a = c(1, NA)) df_1$col_a <- df_1$col_a+0 df_2$col_a <- df_2$col_a+0 df_1$col_a[2] <- NA expect_equal(nrow(setdiff(df_1, df_2)), 0L) }) test_that("distinct() handles auto splicing", { expect_equal( iris %>% distinct(Species), iris %>% distinct(data.frame(Species=Species)) ) expect_equal( iris %>% distinct(Species), iris %>% distinct(pick(Species)) ) expect_equal( iris %>% mutate(across(starts_with("Sepal"), round)) %>% distinct(Sepal.Length, Sepal.Width), iris %>% distinct(across(starts_with("Sepal"), round)) ) }) test_that("distinct preserves grouping", { gf <- group_by(tibble(x = c(1, 1, 2, 2), y = x), x) i <- count_regroups(out <- distinct(gf)) expect_equal(i, 0) expect_equal(group_vars(out), "x") i <- count_regroups(out <- distinct(gf, x = x + 2)) expect_equal(i, 1) expect_equal(group_vars(out), "x") }) test_that("distinct() preserves attributes on bare data frames (#6318)", { df <- vctrs::data_frame(x = c(1, 1)) attr(df, "foo") <- "bar" out <- distinct(df, x) expect_identical(attr(out, "foo"), "bar") out <- distinct(df, y = x + 1L) expect_identical(attr(out, "foo"), "bar") }) # Errors ------------------------------------------------------------------ test_that("distinct errors when selecting an unknown column (#3140)", { expect_snapshot({ df <- tibble(g = c(1, 2), x = c(1, 2)) (expect_error(df %>% distinct(aa, x))) (expect_error(df %>% distinct(aa, bb))) (expect_error(df %>% distinct(.data$aa))) (expect_error(df %>% distinct(y = a + 1))) }) }) dplyr/tests/testthat/test-utils.R0000644000176200001440000000444614406402754016664 0ustar liggesusers# ------------------------------------------------------------------------------ # quo_is_variable_reference() test_that("quo_is_variable_reference handles .data",{ expect_true(quo_is_variable_reference(quo(x))) expect_true(quo_is_variable_reference(quo(.data$x))) expect_true(quo_is_variable_reference(quo(.data[["x"]]))) quo <- new_quosure(quote(.data[[identity("x")]])) expect_false(quo_is_variable_reference(quo)) }) # ------------------------------------------------------------------------------ # list_flatten() test_that("`list_flatten()` is a no-op on flattened lists", { x <- list(1, 2) expect_identical(list_flatten(x), x) }) test_that("`list_flatten()` flattens list elements", { x <- list(list(1, 2), 3, list(4)) expect_identical(list_flatten(x), list(1, 2, 3, 4)) }) test_that("`list_flatten()` doesn't try to be generic", { my_list <- function(...) structure(list(...), class = c("my_list", "list")) x <- my_list(list(1, 2), 3, my_list(4)) expect_identical(list_flatten(x), list(1, 2, 3, 4)) # The no-op case returns a bare list too x <- my_list(1, 2) expect_identical(list_flatten(x), list(1, 2)) }) test_that("`list_flatten()` only retains inner names of flattened elements", { x <- list(a = list(1, b = 2), 3, list(d = 4), e = 5, f = list(1)) expect_identical(list_flatten(x), list(1, b = 2, 3, d = 4, e = 5, 1)) }) test_that("`list_flatten()` can work recursively", { x <- list(list(list(1, 2), 3), 4) # Not by default expect_identical(list_flatten(x), list(list(1, 2), 3, 4)) expect_identical(list_flatten(x, recursive = TRUE), list(1, 2, 3, 4)) }) test_that("recursive `list_flatten()` handles names correctly", { x <- list(a = list(b = list(1), c = list(d = 2), 3, e = 4), f = 5) expect_identical( list_flatten(x, recursive = TRUE), list(1, d = 2, 3, e = 4, f = 5) ) }) test_that("`list_flatten()` accepts a predicate `fn` to selectively flatten", { is_flattenable <- function(x) !is_named(x) x <- list(a = list(list(1), list(b = 2), 3), c = 4, d = list(e = 5), f = list(6)) expect_identical( list_flatten(x, fn = is_flattenable), list(list(1), list(b = 2), 3, c = 4, d = list(e = 5), 6) ) expect_identical( list_flatten(x, fn = is_flattenable, recursive = TRUE), list(1, list(b = 2), 3, c = 4, d = list(e = 5), 6) ) }) dplyr/tests/testthat/test-rename.R0000644000176200001440000000511114366556340016767 0ustar liggesuserstest_that("rename() handles deprecated `.data` pronoun", { withr::local_options(lifecycle_verbosity = "quiet") expect_identical(rename(tibble(x = 1), y = .data$x), tibble(y = 1)) }) test_that("arguments to rename() don't match vars_rename() arguments (#2861)", { df <- tibble(a = 1) expect_identical(rename(df, var = a), tibble(var = 1)) expect_identical(rename(group_by(df, a), var = a), group_by(tibble(var = 1), var)) expect_identical(rename(df, strict = a), tibble(strict = 1)) expect_identical(rename(group_by(df, a), strict = a), group_by(tibble(strict = 1), strict)) }) test_that("rename() to UTF-8 column names", { skip_if_not(l10n_info()$"UTF-8") df <- tibble(a = 1) %>% rename("\u5e78" := a) expect_equal(colnames(df), "\u5e78") }) test_that("can rename() with strings and character vectors", { vars <- c(foo = "cyl", bar = "am") expect_identical(rename(mtcars, !!!vars), rename(mtcars, foo = cyl, bar = am)) expect_identical(rename(mtcars, !!vars), rename(mtcars, foo = cyl, bar = am)) }) test_that("rename preserves grouping", { gf <- group_by(tibble(g = 1:3, x = 3:1), g) i <- count_regroups(out <- rename(gf, h = g)) expect_equal(i, 0) expect_equal(group_vars(out), "h") }) test_that("can rename with duplicate columns", { df <- tibble(x = 1, x = 2, y = 1, .name_repair = "minimal") expect_named(df %>% rename(x2 = 2), c("x", "x2", "y")) }) test_that("rename() ignores duplicates", { df <- tibble(x = 1) expect_named(rename(df, a = x, b = x), "b") }) # rename_with ------------------------------------------------------------- test_that("can select columns", { df <- tibble(x = 1, y = 2) expect_named(df %>% rename_with(toupper, 1), c("X", "y")) df <- tibble(x = 1, y = 2) expect_named(df %>% rename_with(toupper, x), c("X", "y")) }) test_that("passes ... along", { df <- tibble(x = 1, y = 2) expect_named(df %>% rename_with(gsub, 1, pattern = "x", replacement = "X"), c("X", "y")) }) test_that("can't create duplicated names", { df <- tibble(x = 1, y = 2) expect_error(df %>% rename_with(~ rep_along(.x, "X")), class = "vctrs_error_names") }) test_that("`.fn` result type is checked (#6561)", { df <- tibble(x = 1) fn <- function(x) 1L expect_snapshot(error = TRUE, { rename_with(df, fn) }) }) test_that("`.fn` result size is checked (#6561)", { df <- tibble(x = 1, y = 2) fn <- function(x) c("a", "b", "c") expect_snapshot(error = TRUE, { rename_with(df, fn) }) }) test_that("can't rename in `.cols`", { df <- tibble(x = 1) expect_snapshot(error = TRUE, { rename_with(df, toupper, .cols = c(y = x)) }) }) dplyr/tests/testthat/test-transmute.R0000644000176200001440000000610014277511752017540 0ustar liggesuserstest_that("non-syntactic grouping variable is preserved (#1138)", { df <- tibble(`a b` = 1L) %>% group_by(`a b`) %>% transmute() expect_named(df, "a b") }) test_that("transmute preserves grouping", { gf <- group_by(tibble(x = 1:2, y = 2), x) i <- count_regroups(out <- transmute(gf, x = 1)) expect_equal(i, 1L) expect_equal(group_vars(out), "x") expect_equal(nrow(group_data(out)), 1) i <- count_regroups(out <- transmute(gf, z = 1)) expect_equal(i, 0) expect_equal(group_data(out), group_data(gf)) }) # Empty transmutes ------------------------------------------------- test_that("transmute with no args returns grouping vars", { df <- tibble(x = 1, y = 2) gf <- group_by(df, x) expect_equal(df %>% transmute(), df[integer()]) expect_equal(gf %>% transmute(), gf[1L]) }) # transmute variables ----------------------------------------------- test_that("transmute succeeds in presence of raw columns (#1803)", { df <- tibble(a = 1:3, b = as.raw(1:3)) expect_identical(transmute(df, a), df["a"]) expect_identical(transmute(df, b), df["b"]) }) test_that("arguments to transmute() don't match vars_transmute() arguments", { df <- tibble(a = 1) expect_identical(transmute(df, var = a), tibble(var = 1)) expect_identical(transmute(df, exclude = a), tibble(exclude = 1)) expect_identical(transmute(df, include = a), tibble(include = 1)) }) test_that("arguments to rename() don't match vars_rename() arguments (#2861)", { df <- tibble(a = 1) expect_identical(rename(df, var = a), tibble(var = 1)) expect_identical(rename(group_by(df, a), var = a), group_by(tibble(var = 1), var)) expect_identical(rename(df, strict = a), tibble(strict = 1)) expect_identical(rename(group_by(df, a), strict = a), group_by(tibble(strict = 1), strict)) }) test_that("can transmute() with .data pronoun (#2715)", { expect_identical(transmute(mtcars, .data$cyl), transmute(mtcars, cyl)) }) test_that("transmute() does not warn when a variable is removed with = NULL (#4609)", { df <- data.frame(x=1) expect_warning(transmute(df, y =x+1, z=y*2, y = NULL), NA) }) test_that("transmute() can handle auto splicing", { expect_equal( iris %>% transmute(tibble(Sepal.Length, Sepal.Width)), iris %>% select(Sepal.Length, Sepal.Width) ) }) test_that("transmute() retains ordering supplied in `...`, even for pre-existing columns (#6086)", { df <- tibble(x = 1:3, y = 4:6) out <- transmute(df, x, z = x + 1, y) expect_named(out, c("x", "z", "y")) }) test_that("transmute() retains ordering supplied in `...`, even for group columns (#6086)", { df <- tibble(x = 1:3, g1 = 1:3, g2 = 1:3, y = 4:6) df <- group_by(df, g1, g2) out <- transmute(df, x, z = x + 1, y, g1) # - Untouched group variables are first # - Following by ordering supplied through `...` expect_named(out, c("g2", "x", "z", "y", "g1")) }) test_that("transmute() error messages", { expect_snapshot({ (expect_error(transmute(mtcars, cyl2 = cyl, .keep = 'all'))) (expect_error(transmute(mtcars, cyl2 = cyl, .before = disp))) (expect_error(transmute(mtcars, cyl2 = cyl, .after = disp))) }) }) dplyr/tests/testthat/test-mutate.R0000644000176200001440000006107114406402754017020 0ustar liggesuserstest_that("empty mutate returns input", { df <- tibble(x = 1) gf <- group_by(df, x) expect_equal(mutate(df), df) expect_equal(mutate(df, .by = x), df) expect_equal(mutate(gf), gf) expect_equal(mutate(df, !!!list()), df) expect_equal(mutate(df, !!!list(), .by = x), df) expect_equal(mutate(gf, !!!list()), gf) }) test_that("rownames preserved", { df <- data.frame(x = c(1, 2), row.names = c("a", "b")) df <- mutate(df, y = 2) expect_equal(row.names(df), c("a", "b")) df <- mutate(df, y = 2, .by = x) expect_equal(row.names(df), c("a", "b")) }) test_that("mutations applied progressively", { df <- tibble(x = 1) expect_equal(df %>% mutate(y = x + 1, z = y + 1), tibble(x = 1, y = 2, z = 3)) expect_equal(df %>% mutate(x = x + 1, x = x + 1), tibble(x = 3)) expect_equal(df %>% mutate(x = 2, y = x), tibble(x = 2, y = 2)) df <- data.frame(x = 1, y = 2) expect_equal( df %>% mutate(x2 = x, x3 = x2 + 1), df %>% mutate(x2 = x + 0, x3 = x2 + 1) ) }) test_that("length-1 vectors are recycled (#152)", { df <- tibble(x = 1:4) expect_equal(mutate(df, y = 1)$y, rep(1, 4)) expect_error(mutate(df, y = 1:2)) }) test_that("can remove variables with NULL (#462)", { df <- tibble(x = 1:3, y = 1:3) gf <- group_by(df, x) expect_equal(df %>% mutate(y = NULL), df[1]) expect_equal(gf %>% mutate(y = NULL), gf[1]) # even if it doesn't exist expect_equal(df %>% mutate(z = NULL), df) # or was just created expect_equal(df %>% mutate(z = 1, z = NULL), df) # regression test for https://github.com/tidyverse/dplyr/issues/4974 expect_equal( mutate(data.frame(x = 1, y = 1), z = 1, x = NULL, y = NULL), data.frame(z = 1) ) }) test_that("mutate() names pronouns correctly (#2686)", { expect_named(mutate(tibble(x = 1), .data$x), "x") expect_named(mutate(tibble(x = 1), .data[["x"]]), "x") }) test_that("mutate() supports unquoted values", { df <- tibble(g = c(1, 1, 2, 2, 2), x = 1:5) expect_identical(mutate(df, out = !!1), mutate(df, out = 1)) expect_identical(mutate(df, out = !!(1:5)), mutate(df, out = 1:5)) expect_identical(mutate(df, out = !!quote(1:5)), mutate(df, out = 1:5)) gdf <- group_by(df, g) expect_identical(mutate(gdf, out = !!1), mutate(gdf, out = 1)) }) test_that("assignments don't overwrite variables (#315)", { df <- tibble(x = 1, y = 2) out <- df %>% mutate(z = {x <- 10; x}) expect_equal(out, tibble(x = 1, y = 2, z = 10)) }) test_that("can mutate a data frame with zero columns", { df <- new_data_frame(n = 2L) expect_equal(mutate(df, x = 1), data.frame(x = c(1, 1))) }) test_that("mutate() handles symbol expressions", { df <- tibble(x = structure(1, class = "alien")) res <- mutate(df, y = x) expect_identical(df$x, res$y) gf <- group_by(df, x) res <- mutate(df, y = x) expect_identical(df$x, res$y) }) test_that("mutate() supports constants (#6056, #6305)", { df <- data.frame(x = 1:10, g = rep(1:2, each = 5)) y <- 1:10 z <- 1:5 expect_identical(df %>% mutate(y = !!y) %>% pull(y), y) expect_identical(df %>% group_by(g) %>% mutate(y = !!y) %>% pull(y), y) expect_identical(df %>% rowwise() %>% mutate(y = !!y) %>% pull(y), y) expect_snapshot({ (expect_error(df %>% mutate(z = !!z))) (expect_error(df %>% group_by(g) %>% mutate(z = !!z))) (expect_error(df %>% rowwise() %>% mutate(z = !!z))) }) # `.env$` is used for per group evaluation expect_identical(df %>% mutate(y = .env$y) %>% pull(y), y) expect_identical(df %>% group_by(g) %>% mutate(z = .env$z) %>% pull(z), c(z, z)) expect_snapshot({ (expect_error(df %>% group_by(g) %>% mutate(y = .env$y))) (expect_error(df %>% rowwise() %>% mutate(y = .env$y))) }) }) test_that("can't overwrite column active bindings (#6666)", { skip_if(getRversion() < "3.6.3", message = "Active binding error changed") df <- tibble(g = 1:2, x = 3:4) gdf <- group_by(df, g) # The error seen here comes from trying to `<-` to an active binding when # the active binding function has 0 arguments. expect_snapshot(error = TRUE, { mutate(df, y = { x <<- 2 x }) }) expect_snapshot(error = TRUE, { mutate(df, .by = g, y = { x <<- 2 x }) }) expect_snapshot(error = TRUE, { mutate(gdf, y = { x <<- 2 x }) }) }) test_that("assigning with `<-` doesn't affect the mask (#6666)", { df <- tibble(g = 1:2, x = 3:4) gdf <- group_by(df, g) out <- mutate(df, .by = g, y = { x <- x + 2L x }) expect_identical(out$x, c(3L, 4L)) expect_identical(out$y, c(5L, 6L)) out <- mutate(gdf, y = { x <- x + 2L x }) expect_identical(out$x, c(3L, 4L)) expect_identical(out$y, c(5L, 6L)) }) test_that("`across()` inline expansions that use `<-` don't affect the mask (#6666)", { df <- tibble(g = 1:2, x = 3:4) out <- df %>% mutate( across(x, function(col) { col <- col + 2L col }), .by = g ) expect_identical(out$x, c(5L, 6L)) }) test_that("can't share local variables across expressions (#6666)", { df <- tibble(x = 1:2, y = 3:4) expect_snapshot(error = TRUE, { mutate( df, x2 = { foo <- x x }, y2 = { foo } ) }) }) # column types ------------------------------------------------------------ test_that("glue() is supported", { expect_equal( tibble(x = 1) %>% mutate(y = glue("")), tibble(x = 1, y = glue("")) ) }) test_that("mutate disambiguates NA and NaN (#1448)", { df <- tibble(x = c(1, NA, NaN)) out <- mutate(df, y = x * 1) expect_equal(out$y, df$x) }) test_that("mutate preserves names (#1689, #2675)", { df <- tibble(a = 1:3) out1 <- df %>% mutate(b = setNames(1:3, letters[1:3])) out2 <- df %>% mutate(b = setNames(as.list(1:3), letters[1:3])) expect_named(out1$b, letters[1:3]) expect_named(out2$b, letters[1:3]) }) test_that("mutate handles matrix columns", { df <- data.frame(a = rep(1:3, each = 2), b = 1:6) df_regular <- mutate(df, b = scale(b)) df_grouped <- mutate(group_by(df, a), b = scale(b)) df_rowwise <- mutate(rowwise(df), b = scale(b)) expect_equal(dim(df_regular$b), c(6, 1)) expect_equal(dim(df_grouped$b), c(6, 1)) expect_equal(dim(df_rowwise$b), c(6, 1)) }) test_that("mutate handles data frame columns", { df <- data.frame("a" = c(1, 2, 3), "b" = c(2, 3, 4), "base_col" = c(3, 4, 5)) res <- mutate(df, new_col = data.frame(x = 1:3)) expect_equal(res$new_col, data.frame(x = 1:3)) res <- mutate(group_by(df, a), new_col = data.frame(x = a)) expect_equal(res$new_col, data.frame(x = 1:3)) res <- mutate(rowwise(df), new_col = data.frame(x = a)) expect_equal(res$new_col, data.frame(x = 1:3)) }) test_that("unnamed data frames are automatically unspliced (#2326, #3630)", { expect_identical( tibble(a = 1) %>% mutate(tibble(b = 2)), tibble(a = 1, b = 2) ) expect_identical( tibble(a = 1) %>% mutate(tibble(b = 2), tibble(b = 3)), tibble(a = 1, b = 3) ) expect_identical( tibble(a = 1) %>% mutate(tibble(b = 2), c = b), tibble(a = 1, b = 2, c = 2) ) }) test_that("named data frames are packed (#2326, #3630)", { df <- tibble(x = 1) out <- df %>% mutate(y = tibble(a = x)) expect_equal(out, tibble(x = 1, y = tibble(a = 1))) }) test_that("unchop only called for when multiple groups", { df <- data.frame(g = 1, x = 1:5) out <- mutate(df, x = ts(x, start = c(1971, 1), frequency = 52)) expect_s3_class(out$x, "ts") gdf <- group_by(df, g) out <- mutate(gdf, x = ts(x, start = c(1971, 1), frequency = 52)) expect_s3_class(out$x, "ts") }) # output types ------------------------------------------------------------ test_that("mutate preserves grouping", { gf <- group_by(tibble(x = 1:2, y = 2), x) i <- count_regroups(out <- mutate(gf, x = 1)) expect_equal(i, 1L) expect_equal(group_vars(out), "x") expect_equal(nrow(group_data(out)), 1) i <- count_regroups(out <- mutate(gf, z = 1)) expect_equal(i, 0) expect_equal(group_data(out), group_data(gf)) }) test_that("mutate works on zero-row grouped data frame (#596)", { dat <- data.frame(a = numeric(0), b = character(0), stringsAsFactors = TRUE) res <- dat %>% group_by(b, .drop = FALSE) %>% mutate(a2 = a * 2) expect_type(res$a2, "double") expect_s3_class(res, "grouped_df") expect_equal(res$a2, numeric(0)) expect_type(group_rows(res), "list") expect_equal(attr(group_rows(res), "ptype"), integer()) expect_equal(group_data(res)$b, factor(character(0))) }) test_that("mutate preserves class of zero-row rowwise (#4224, #6303)", { # Each case needs to test both x and identity(x) because these flow # through two slightly different pathways. rf <- rowwise(tibble(x = character(0))) out <- mutate(rf, x2 = identity(x), x3 = x) expect_equal(out$x2, character()) expect_equal(out$x3, character()) # including list-of classes of list-cols where possible rf <- rowwise(tibble(x = list_of(.ptype = character()))) out <- mutate(rf, x2 = identity(x), x3 = x) expect_equal(out$x2, character()) expect_equal(out$x3, character()) # an empty list is turns into a logical (aka unspecified) rf <- rowwise(tibble(x = list())) out <- mutate(rf, x2 = identity(x), x3 = x) expect_equal(out$x2, logical()) expect_equal(out$x3, logical()) }) test_that("mutate works on empty data frames (#1142)", { df <- data.frame() res <- df %>% mutate() expect_equal(nrow(res), 0L) expect_equal(length(res), 0L) res <- df %>% mutate(x = numeric()) expect_equal(names(res), "x") expect_equal(nrow(res), 0L) expect_equal(length(res), 1L) }) test_that("mutate handles 0 rows rowwise (#1300)", { res <- tibble(y = character()) %>% rowwise() %>% mutate(z = 1) expect_equal(nrow(res), 0L) }) test_that("rowwise mutate gives expected results (#1381)", { f <- function(x) ifelse(x < 2, NA_real_, x) res <- tibble(x = 1:3) %>% rowwise() %>% mutate(y = f(x)) expect_equal(res$y, c(NA, 2, 3)) }) test_that("rowwise mutate un-lists existing size-1 list-columns (#6302)", { # Existing column rf <- rowwise(tibble(x = as.list(1:3))) out <- mutate(rf, y = x) expect_equal(out$y, 1:3) # New column rf <- rowwise(tibble(x = 1:3)) out <- mutate(rf, y = list(1), z = y) expect_identical(out$z, c(1, 1, 1)) # Column of data 1-row data frames rf <- rowwise(tibble(x = list(tibble(a = 1), tibble(a = 2)))) out <- mutate(rf, y = x) expect_identical(out$y, tibble(a = c(1, 2))) # Preserves known list-of type rf <- rowwise(tibble(x = list_of(.ptype = character()))) out <- mutate(rf, y = x) expect_identical(out$y, character()) # Errors if it's not a length-1 list df <- rowwise(tibble(x = list(1, 2:3))) expect_snapshot(mutate(df, y = x), error = TRUE) }) test_that("grouped mutate does not drop grouping attributes (#1020)", { d <- data.frame(subject = c("Jack", "Jill"), id = c(2, 1)) %>% group_by(subject) a1 <- names(attributes(d)) a2 <- names(attributes(d %>% mutate(foo = 1))) expect_equal(setdiff(a1, a2), character(0)) }) test_that("mutate() hands list columns with rowwise magic to follow up expressions (#4845)", { test <- rowwise(tibble(x = 1:2)) expect_identical( test %>% mutate(a = list(1)) %>% mutate(b = list(a + 1)), test %>% mutate(a = list(1), b = list(a + 1)) ) }) test_that("mutate keeps zero length groups", { df <- tibble( e = 1, f = factor(c(1, 1, 2, 2), levels = 1:3), g = c(1, 1, 2, 2), x = c(1, 2, 1, 4) ) df <- group_by(df, e, f, g, .drop = FALSE) expect_equal( group_size(mutate(df, z = 2)), c(2, 2, 0) ) }) # other ------------------------------------------------------------------- test_that("no utf8 invasion (#722)", { skip_if_not(l10n_info()$"UTF-8") skip_if_not_installed("lobstr") source("utf-8.txt", local = TRUE, encoding = "UTF-8") }) test_that("mutate() to UTF-8 column names", { df <- tibble(a = 1) %>% mutate("\u5e78" := a) expect_equal(colnames(df), c("a", "\u5e78")) }) test_that("Non-ascii column names in version 0.3 are not duplicated (#636)", { local_non_utf8_encoding() df <- tibble(a = "1", b = "2") names(df) <- c("a", enc2native("\u4e2d")) res <- df %>% mutate_all(as.numeric) expect_equal(names(res), as_utf8_character(names(df))) }) test_that("mutate coerces results from one group with all NA values (#1463) ", { df <- tibble(x = c(1, 2), y = c(1, NA)) res <- df %>% group_by(x) %>% mutate(z = ifelse(y > 1, 1, 2)) expect_true(is.na(res$z[2])) expect_type(res$z, "double") }) test_that("grouped subsets are not lazy (#3360)", { make_call <- function(x) { quo(!!x) } res <- tibble(name = 1:2, value = letters[1:2]) %>% rowwise() %>% mutate(call = list(make_call(value))) %>% pull() expect_identical(res, list(make_call("a"), make_call("b"))) res <- tibble(name = 1:2, value = letters[1:2]) %>% group_by(name) %>% mutate(call = list(make_call(value))) %>% pull() expect_identical(res, list(make_call("a"), make_call("b"))) }) test_that("mutate() evaluates expression for empty groups", { df <- tibble(f = factor(c("a", "b"), levels = c("a", "b", "c"))) gf <- group_by(df, f, .drop = FALSE) count <- 0 mutate(gf, x = {count <<- count + 1}) expect_equal(count, 3L) }) test_that("DataMask$add() forces chunks (#4677)", { df <- tibble(bf10 = 0.244) %>% mutate( bf01 = 1 / bf10, log_e_bf10 = log(bf10), log_e_bf01 = log(bf01) ) expect_equal(df$log_e_bf01, log(1 / 0.244)) }) test_that("DataMask uses fresh copies of group id / size variables (#6762)", { df <- tibble(x = 1:2) fn <- function() { df <- tibble(a = 1) # Otherwise, this nested `mutate()` can modify the same # id/size variable as the outer one, which causes havoc mutate(df, b = a + 1) } out <- mutate(df, y = {fn(); x}) expect_identical(out$x, 1:2) expect_identical(out$y, 1:2) }) test_that("mutate() correctly auto-names expressions (#6741)", { df <- tibble(a = 1L) expect_identical(mutate(df, -a), tibble(a = 1L, "-a" = -1L)) foo <- "foobar" expect_identical(mutate(df, foo), tibble(a = 1L, foo = "foobar")) a <- 2L expect_identical(mutate(df, a), tibble(a = 1L)) df <- tibble(a = 1L, "a + 1" = 5L) a <- 2L expect_identical(mutate(df, a + 1), tibble(a = 1L, "a + 1" = 2)) }) # .by ------------------------------------------------------------------------- test_that("can group transiently using `.by`", { df <- tibble(g = c(1, 1, 2, 1, 2), x = c(5, 2, 1, 2, 3)) out <- mutate(df, x = mean(x), .by = g) expect_identical(out$g, df$g) expect_identical(out$x, c(3, 3, 2, 3, 2)) expect_s3_class(out, class(df), exact = TRUE) }) test_that("transient grouping retains bare data.frame class", { df <- data.frame(g = c(1, 1, 2, 1, 2), x = c(5, 2, 1, 2, 3)) out <- mutate(df, x = mean(x), .by = g) expect_s3_class(out, class(df), exact = TRUE) }) test_that("transient grouping retains data frame attributes (#6100)", { # With data.frames or tibbles df <- data.frame(g = c(1, 1, 2), x = c(1, 2, 1)) tbl <- as_tibble(df) attr(df, "foo") <- "bar" attr(tbl, "foo") <- "bar" out <- mutate(df, x = mean(x), .by = g) expect_identical(attr(out, "foo"), "bar") out <- mutate(tbl, x = mean(x), .by = g) expect_identical(attr(out, "foo"), "bar") }) test_that("can `NULL` out the `.by` column", { df <- tibble(x = 1:3) expect_identical( mutate(df, x = NULL, .by = x), new_tibble(list(), nrow = 3) ) }) test_that("catches `.by` with grouped-df", { df <- tibble(x = 1) gdf <- group_by(df, x) expect_snapshot(error = TRUE, { mutate(gdf, .by = x) }) }) test_that("catches `.by` with rowwise-df", { df <- tibble(x = 1) rdf <- rowwise(df) expect_snapshot(error = TRUE, { mutate(rdf, .by = x) }) }) # .before, .after, .keep ------------------------------------------------------ test_that(".keep = 'unused' keeps variables explicitly mentioned", { df <- tibble(x = 1, y = 2) out <- mutate(df, x1 = x + 1, y = y, .keep = "unused") expect_named(out, c("y", "x1")) }) test_that(".keep = 'used' not affected by across() or pick()", { df <- tibble(x = 1, y = 2, z = 3, a = "a", b = "b", c = "c") # This must evaluate every column in order to figure out if should # be included in the set or not, but that shouldn't be counted for # the purposes of "used" variables out <- mutate(df, across(where(is.numeric), identity), .keep = "unused") expect_named(out, names(df)) out <- mutate(df, pick(where(is.numeric)), .keep = "unused") expect_named(out, names(df)) }) test_that(".keep = 'used' keeps variables used in expressions", { df <- tibble(a = 1, b = 2, c = 3, x = 1, y = 2) out <- mutate(df, xy = x + y, .keep = "used") expect_named(out, c("x", "y", "xy")) }) test_that(".keep = 'none' only keeps grouping variables", { df <- tibble(x = 1, y = 2) gf <- group_by(df, x) expect_named(mutate(df, z = 1, .keep = "none"), "z") expect_named(mutate(gf, z = 1, .keep = "none"), c("x", "z")) }) test_that(".keep = 'none' retains original ordering (#5967)", { df <- tibble(x = 1, y = 2) expect_named(df %>% mutate(y = 1, x = 2, .keep = "none"), c("x", "y")) # even when grouped gf <- group_by(df, x) expect_named(gf %>% mutate(y = 1, x = 2, .keep = "none"), c("x", "y")) }) test_that("can use .before and .after to control column position", { df <- tibble(x = 1, y = 2) expect_named(mutate(df, z = 1), c("x", "y", "z")) expect_named(mutate(df, z = 1, .before = 1), c("z", "x", "y")) expect_named(mutate(df, z = 1, .after = 1), c("x", "z", "y")) # but doesn't affect order of existing columns df <- tibble(x = 1, y = 2) expect_named(mutate(df, x = 1, .after = y), c("x", "y")) }) test_that("attributes of bare data frames are retained when `.before` and `.after` are used (#6341)", { # We require `[` methods to be in charge of keeping extra attributes for all # data frame subclasses (except for data.tables) df <- vctrs::data_frame(x = 1, y = 2) attr(df, "foo") <- "bar" out <- mutate(df, z = 3, .before = x) expect_identical(attr(out, "foo"), "bar") }) test_that(".keep and .before/.after interact correctly", { df <- tibble(x = 1, y = 1, z = 1, a = 1, b = 2, c = 3) %>% group_by(a, b) expect_named(mutate(df, d = 1, x = 2, .keep = "none"), c("x", "a", "b", "d")) expect_named(mutate(df, d = 1, x = 2, .keep = "none", .before = "a"), c("x", "d", "a", "b")) expect_named(mutate(df, d = 1, x = 2, .keep = "none", .after = "a"), c("x", "a", "d", "b")) }) test_that("dropping column with `NULL` then readding it retains original location", { df <- tibble(x = 1, y = 2, z = 3, a = 4) df <- group_by(df, z) expect_named(mutate(df, y = NULL, y = 3, .keep = "all"), c("x", "y", "z", "a")) expect_named(mutate(df, b = a, y = NULL, y = 3, .keep = "used"), c("y", "z", "a", "b")) expect_named(mutate(df, b = a, y = NULL, y = 3, .keep = "unused"), c("x", "y", "z", "b")) # It isn't treated as a "new" column expect_named(mutate(df, y = NULL, y = 3, .keep = "all", .before = x), c("x", "y", "z", "a")) }) test_that("setting a new column to `NULL` works with `.before` and `.after` (#6563)", { df <- tibble(x = 1, y = 2, z = 3, a = 4) expect_named(mutate(df, b = NULL, .before = 1), names(df)) expect_named(mutate(df, b = 1, b = NULL, .before = 1), names(df)) expect_named(mutate(df, b = NULL, b = 1, .before = 1), c("b", "x", "y", "z", "a")) expect_named(mutate(df, b = NULL, c = 1, .after = 2), c("x", "y", "c", "z", "a")) }) test_that(".keep= always retains grouping variables (#5582)", { df <- tibble(x = 1, y = 2, z = 3) %>% group_by(z) expect_equal( df %>% mutate(a = x + 1, .keep = "none"), tibble(z = 3, a = 2) %>% group_by(z) ) expect_equal( df %>% mutate(a = x + 1, .keep = "all"), tibble(x = 1, y = 2, z = 3, a = 2) %>% group_by(z) ) expect_equal( df %>% mutate(a = x + 1, .keep = "used"), tibble(x = 1, z = 3, a = 2) %>% group_by(z) ) expect_equal( df %>% mutate(a = x + 1, .keep = "unused"), tibble(y = 2, z = 3, a = 2) %>% group_by(z) ) }) test_that("mutate() preserves the call stack on error (#5308)", { foobar <- function() stop("foo") stack <- NULL expect_error( withCallingHandlers( error = function(...) stack <<- sys.calls(), mutate(mtcars, foobar()) ) ) expect_true(some(stack, is_call, "foobar")) }) test_that("dplyr data mask can become obsolete", { lazy <- function(x) { list(enquo(x)) } df <- tibble( x = 1:2 ) res <- df %>% rowwise() %>% mutate(y = lazy(x), .keep = "unused") expect_equal(names(res), c("x", "y")) expect_error(eval_tidy(res$y[[1]])) }) test_that("mutate() deals with 0 groups (#5534)", { df <- data.frame(x = numeric()) %>% group_by(x) expect_equal( mutate(df, y = x + 1), data.frame(x = numeric(), y = numeric()) %>% group_by(x) ) expect_snapshot({ mutate(df, y = max(x)) }) }) test_that("functions are not skipped in data pronoun (#5608)", { f <- function(i) i + 1 df <- tibble(a = list(f), b = 1) two <- df %>% rowwise() %>% mutate(res = .data$a(.data$b)) %>% pull(res) expect_equal(two, 2) }) test_that("mutate() casts data frame results to common type (#5646)", { df <- data.frame(x = 1:2, g = 1:2) %>% group_by(g) res <- df %>% mutate(if (g == 1) data.frame(y = 1) else data.frame(y = 1, z = 2)) expect_equal(res$z, c(NA, 2)) }) test_that("mutate() supports empty list columns in rowwise data frames (#5804", { res <- tibble(a = list()) %>% rowwise() %>% mutate(n = lengths(a)) expect_equal(res$n, integer()) }) test_that("mutate() fails on named empty arguments (#5925)", { expect_error( mutate(tibble(), bogus = ) ) }) # Error messages ---------------------------------------------------------- test_that("mutate() give meaningful errors", { expect_snapshot({ tbl <- tibble(x = 1:2, y = 1:2) # setting column to NULL makes it unavailable (expect_error(tbl %>% mutate(y = NULL, a = sum(y)))) (expect_error(tbl %>% group_by(x) %>% mutate(y = NULL, a = sum(y)) )) # incompatible column type (expect_error(tibble(x = 1) %>% mutate(y = mean))) # Unsupported type" df <- tibble(g = c(1, 1, 2, 2, 2), x = 1:5) (expect_error(df %>% mutate(out = env(a = 1)))) (expect_error(df %>% group_by(g) %>% mutate(out = env(a = 1)) )) (expect_error(df %>% rowwise() %>% mutate(out = rnorm) )) # incompatible types across groups (expect_error( data.frame(x = rep(1:5, each = 3)) %>% group_by(x) %>% mutate(val = ifelse(x < 3, "foo", 2)) )) # mixed nulls (expect_error( tibble(a = 1:3, b=4:6) %>% group_by(a) %>% mutate(if(a==1) NULL else "foo") )) (expect_error( tibble(a = 1:3, b=4:6) %>% group_by(a) %>% mutate(if(a==2) NULL else "foo") )) # incompatible size (expect_error( data.frame(x = c(2, 2, 3, 3)) %>% mutate(int = 1:5) )) (expect_error( data.frame(x = c(2, 2, 3, 3)) %>% group_by(x) %>% mutate(int = 1:5) )) (expect_error( data.frame(x = c(2, 3, 3)) %>% group_by(x) %>% mutate(int = 1:5) )) (expect_error( data.frame(x = c(2, 2, 3, 3)) %>% rowwise() %>% mutate(int = 1:5) )) (expect_error( tibble(y = list(1:3, "a")) %>% rowwise() %>% mutate(y2 = y) )) (expect_error( data.frame(x = 1:10) %>% mutate(y = 11:20, y = 1:2) )) # .data pronoun (expect_error( tibble(a = 1) %>% mutate(c = .data$b) )) (expect_error( tibble(a = 1:3) %>% group_by(a) %>% mutate(c = .data$b) )) # obsolete data mask lazy <- function(x) list(enquo(x)) res <- tbl %>% rowwise() %>% mutate(z = lazy(x), .keep = "unused") (expect_error( eval_tidy(res$z[[1]]) )) # Error that contains { (expect_error( tibble() %>% mutate(stop("{")) )) }) }) test_that("mutate() errors refer to expressions if not named", { expect_snapshot({ (expect_error(mutate(mtcars, 1:3))) (expect_error(mutate(group_by(mtcars, cyl), 1:3))) }) }) test_that("`mutate()` doesn't allow data frames with missing or empty names (#6758)", { df1 <- new_data_frame(set_names(list(1), "")) df2 <- new_data_frame(set_names(list(1), NA_character_)) expect_snapshot(error = TRUE, { mutate(df1) }) expect_snapshot(error = TRUE, { mutate(df2) }) }) dplyr/tests/testthat/test-case-when.R0000644000176200001440000001420214406402754017365 0ustar liggesuserstest_that("matches values in order", { x <- 1:3 expect_equal( case_when( x <= 1 ~ 1, x <= 2 ~ 2, x <= 3 ~ 3 ), c(1, 2, 3) ) }) test_that("unmatched gets missing value", { x <- 1:3 expect_equal( case_when( x <= 1 ~ 1, x <= 2 ~ 2 ), c(1, 2, NA) ) }) test_that("missing values can be replaced (#1999)", { x <- c(1:3, NA) expect_equal( case_when( x <= 1 ~ 1, x <= 2 ~ 2, is.na(x) ~ 0 ), c(1, 2, NA, 0) ) }) test_that("NA conditions (#2927)", { expect_equal( case_when( c(TRUE, FALSE, NA) ~ 1:3, TRUE ~ 4L ), c(1L, 4L, 4L) ) }) test_that("any `TRUE` overrides an `NA`", { x <- c(1, 2, NA, 3) expect <- c("one", "not_one", "missing", "not_one") # `TRUE` overriding before the `NA` expect_identical( case_when( is.na(x) ~ "missing", x == 1 ~ "one", .default = "not_one" ), expect ) # `TRUE` overriding after the `NA` expect_identical( case_when( x == 1 ~ "one", is.na(x) ~ "missing", .default = "not_one" ), expect ) }) test_that("atomic conditions (#2909)", { expect_equal( case_when( TRUE ~ 1:3, FALSE ~ 4:6 ), 1:3 ) expect_equal( case_when( NA ~ 1:3, TRUE ~ 4:6 ), 4:6 ) }) test_that("zero-length conditions and values (#3041)", { expect_equal( case_when( TRUE ~ integer(), FALSE ~ integer() ), integer() ) expect_equal( case_when( logical() ~ 1, logical() ~ 2 ), numeric() ) }) test_that("case_when can be used in anonymous functions (#3422)", { res <- tibble(a = 1:3) %>% mutate(b = (function(x) case_when(x < 2 ~ TRUE, .default = FALSE))(a)) %>% pull() expect_equal(res, c(TRUE, FALSE, FALSE)) }) test_that("case_when() can be used inside mutate()", { out <- mtcars[1:4, ] %>% mutate(out = case_when( cyl == 4 ~ 1, .data[["am"]] == 1 ~ 2, .default = 0 )) %>% pull() expect_identical(out, c(2, 2, 1, 0)) }) test_that("case_when() accepts logical conditions with attributes (#6678)", { x <- structure(c(FALSE, TRUE), label = "foo") expect_identical(case_when(x ~ 1, .default = 2), c(2, 1)) }) test_that("can pass quosures to case_when()", { fs <- local({ x <- 3:1 quos( x < 2 ~ TRUE, TRUE ~ FALSE ) }) expect_identical(case_when(!!!fs), c(FALSE, FALSE, TRUE)) }) test_that("can pass nested quosures to case_when()", { fs <- local({ foo <- mtcars$cyl[1:4] quos( !!quo(foo) == 4 ~ 1, TRUE ~ 0 ) }) expect_identical(case_when(!!!fs), c(0, 0, 1, 0)) }) test_that("can pass unevaluated formulas to case_when()", { x <- 6:8 fs <- exprs( x == 7L ~ TRUE, TRUE ~ FALSE ) expect_identical(case_when(!!!fs), c(FALSE, TRUE, FALSE)) out <- local({ x <- 7:9 case_when(!!!fs) }) expect_identical(out, c(TRUE, FALSE, FALSE)) }) test_that("unevaluated formulas can refer to data mask", { fs <- exprs( cyl == 4 ~ 1, am == 1 ~ 2, TRUE ~ 0 ) out <- mtcars[1:4, ] %>% mutate(out = case_when(!!!fs)) %>% pull() expect_identical(out, c(2, 2, 1, 0)) }) test_that("unevaluated formulas can contain quosures", { quo <- local({ n <- 4 quo(n) }) fs <- exprs( cyl == !!quo ~ 1, am == 1 ~ 2, TRUE ~ 0 ) out <- mtcars[1:4, ] %>% mutate(out = case_when(!!!fs)) %>% pull() expect_identical(out, c(2, 2, 1, 0)) }) test_that("NULL inputs are compacted", { x <- 1:3 bool <- FALSE out <- case_when( x == 2 ~ TRUE, if (bool) x == 3 ~ NA, .default = FALSE ) expect_identical(out, c(FALSE, TRUE, FALSE)) bool <- TRUE out <- case_when( x == 2 ~ TRUE, if (bool) x == 3 ~ NA, .default = FALSE ) expect_identical(out, c(FALSE, TRUE, NA)) }) test_that("passes through `.default` correctly", { expect_identical(case_when(FALSE ~ 1, .default = 2), 2) expect_identical(case_when(FALSE ~ 1:5, .default = 2), rep(2, 5)) expect_identical(case_when(FALSE ~ 1:5, .default = 2:6), 2:6) }) test_that("`.default` isn't part of recycling", { # Because eventually we want to only take the output size from the LHS conditions, # so having `.default` participate in the common size is a step in the wrong # direction expect_snapshot(error = TRUE, { case_when(FALSE ~ 1L, .default = 2:5) }) }) test_that("`.default` is part of common type computation", { expect_identical(case_when(TRUE ~ 1L, .default = 2), 1) expect_snapshot(error = TRUE, { case_when(TRUE ~ 1L, .default = "x") }) }) test_that("passes through `.ptype` correctly", { expect_identical(case_when(TRUE ~ 1, .ptype = integer()), 1L) }) test_that("passes through `.size` correctly", { expect_identical(case_when(TRUE ~ 1, .size = 2), c(1, 1)) expect_snapshot(error = TRUE, { case_when(TRUE ~ 1:2, .size = 3) }) }) # Errors ------------------------------------------------------------------ test_that("invalid type errors are correct (#6261) (#6206)", { expect_snapshot(error = TRUE, { case_when(TRUE ~ 1, TRUE ~ "x") }) }) test_that("`NULL` formula element throws meaningful error", { expect_snapshot(error = TRUE, { case_when(1 ~ NULL) }) expect_snapshot(error = TRUE, { case_when(NULL ~ 1) }) }) test_that("throws chained errors when formula evaluation fails", { expect_snapshot(error = TRUE, { case_when(1 ~ 2, 3 ~ stop("oh no!")) }) expect_snapshot(error = TRUE, { case_when(1 ~ 2, stop("oh no!") ~ 4) }) }) test_that("case_when() give meaningful errors", { expect_snapshot({ (expect_error( case_when( c(TRUE, FALSE) ~ 1:3, c(FALSE, TRUE) ~ 1:2 ) )) (expect_error( case_when( c(TRUE, FALSE) ~ 1, c(FALSE, TRUE, FALSE) ~ 2, c(FALSE, TRUE, FALSE, NA) ~ 3 ) )) (expect_error( case_when(50 ~ 1:3) )) (expect_error( case_when(paste(50)) )) (expect_error( case_when(y ~ x, paste(50)) )) (expect_error( case_when() )) (expect_error( case_when(NULL) )) (expect_error( case_when(~1:2) )) }) }) dplyr/tests/testthat/helper-s3.R0000644000176200001440000000232313663515257016350 0ustar liggesuserslocal_methods <- function(..., .frame = caller_env()) { local_bindings(..., .env = global_env(), .frame = .frame) } local_foo_df <- function(frame = caller_env()) { local_methods(.frame = frame, group_by.foo_df = function(.data, ...) { out <- NextMethod() if (missing(...)) { class(out) <- c("foo_df", class(out)) } else { class(out) <- c("grouped_foo_df", class(out)) } out }, ungroup.grouped_foo_df = function(x, ...) { out <- NextMethod() class(out) <- c("foo_df", class(out)) out } ) } new_ctor <- function(base_class) { function(x = list(), ..., class = NULL) { if (inherits(x, "tbl_df")) { tibble::new_tibble(x, class = c(class, base_class), nrow = nrow(x)) } else if (is.data.frame(x)) { structure(x, class = c(class, base_class, "data.frame"), ...) } else { structure(x, class = c(class, base_class), ...) } } } foobar <- new_ctor("dplyr_foobar") foobaz <- new_ctor("dplyr_foobaz") quux <- new_ctor("dplyr_quux") # For testing reconstructing methods that break invariants by adding # new columns new_dispatched_quux <- function(x) { out <- quux(x) out$dispatched <- rep(TRUE, nrow(out)) out } dplyr/tests/testthat/test-context.R0000644000176200001440000000322314366556340017206 0ustar liggesuserstest_that("cur_group() works", { df <- tibble(g = 1, x = 1) gf <- group_by(df, g) expect_equal( df %>% summarise(key = list(cur_group())) %>% pull(key), list(tibble(.rows = 1L)) ) expect_equal( gf %>% summarise(key = list(cur_group())) %>% pull(key), list(tibble(g = 1)) ) }) test_that("cur_group() works with empty grouped data frame (#6304)", { df <- tibble(x = integer()) gdf <- group_by(df, x) out <- mutate(df, y = cur_group()) expect_identical(out$y, tibble()) out <- mutate(gdf, y = cur_group()) expect_identical(out$y, tibble(x = integer())) }) test_that("cur_group_idx() gives unique id", { df <- tibble(x = c("b", "a", "b")) gf <- group_by(df, x) expect_equal( summarise(gf, id = cur_group_id()), tibble(x = c("a", "b"), id = 1:2) ) expect_equal( mutate(gf, id = cur_group_id()), group_by(tibble(x = df$x, id = c(2, 1, 2)), x) ) }) test_that("cur_group_rows() retrieves row position in original data", { df <- tibble(x = c("b", "a", "b"), y = 1:3) gf <- group_by(df, x) expect_equal( df %>% summarise(x = list(cur_group_rows())) %>% pull(), list(1:3) ) expect_equal( gf %>% summarise(x = list(cur_group_rows())) %>% pull(), list(2L, c(1L, 3L)) ) }) test_that("give useful error messages when not applicable", { expect_snapshot({ (expect_error(n())) (expect_error(cur_column())) (expect_error(cur_group())) (expect_error(cur_group_id())) (expect_error(cur_group_rows())) }) }) test_that("group labels are correctly formatted", { expect_snapshot({ group_labels_details(c("a" = 1)) group_labels_details(c("a" = 1, "b" = 2)) }) }) dplyr/tests/testthat/test-copy-to.R0000644000176200001440000000052714406402754017112 0ustar liggesuserstest_that("`auto_copy()` is a no-op when they share the same source", { df1 <- tibble(x = 1) df2 <- tibble(x = 2) expect_identical(auto_copy(df1, df2), df2) }) test_that("`auto_copy()` throws an informative error on different sources (#6798)", { df <- tibble(x = 1) expect_snapshot(error = TRUE, { auto_copy(df, NULL) }) }) dplyr/tests/testthat/test-nest-by.R0000644000176200001440000000165614366556340017113 0ustar liggesuserstest_that("returns expected type/data", { df <- data.frame(g = 1:2, x = 1:2, y = 1:2) out <- nest_by(df, g) expect_s3_class(out, "rowwise_df") expect_equal(group_vars(out), "g") expect_named(out, c("g", "data")) }) test_that("can control key col", { df <- data.frame(g = 1:2, x = 1:2, y = 1:2) out <- nest_by(df, g, .key = "key") expect_named(out, c("g", "key")) }) test_that("nest_by() inherits grouping", { df <- data.frame(g1 = 1:2, g2 = 1:2, x = 1:2, y = 1:2) expect_equal( df %>% group_by(g1) %>% nest_by() %>% group_vars(), "g1" ) # And you can't have it both ways expect_error(df %>% group_by(g1) %>% nest_by("g2"), "re-group") }) test_that("can control whether grouping data in list-col", { df <- data.frame(g = 1:2, x = 1:2, y = 1:2) out <- nest_by(df, g) expect_named(out$data[[1]], c("x", "y")) out <- nest_by(df, g, .keep = TRUE) expect_named(out$data[[1]], c("g", "x", "y")) }) dplyr/tests/testthat/test-deprec-src-local.R0000644000176200001440000000157414366556340020650 0ustar liggesuserstest_that("src_tbls() includes all tbls (#4326)", { withr::local_options(lifecycle_verbosity = "quiet") expect_equal( src_tbls(src_df(env = env(. = iris))), "." ) }) test_that("src_local only overwrites if overwrite = TRUE", { withr::local_options(lifecycle_verbosity = "quiet") env <- new.env(parent = emptyenv()) env$x <- 1 src_env <- src_df(env = env) df <- tibble(x = 1) copy_to(src_env, df, name = "x", overwrite = TRUE) expect_equal(env$x, df) }) test_that("src_df() is deprecated / errors", { withr::local_options(lifecycle_verbosity = "quiet") expect_snapshot({ # src_local errs with pkg/env (expect_error(src_df("base", new.env()))) (expect_error(src_df())) env <- new.env(parent = emptyenv()) env$x <- 1 src_env <- src_df(env = env) (expect_error( copy_to(src_env, tibble(x = 1), name = "x") )) }) }) dplyr/tests/testthat/test-if-else.R0000644000176200001440000000526014406402754017043 0ustar liggesuserstest_that("scalar true and false are vectorised", { x <- c(TRUE, TRUE, FALSE, FALSE) expect_equal(if_else(x, 1, 2), c(1, 1, 2, 2)) }) test_that("vector true and false are ok", { x <- c(-1, 0, 1) expect_equal(if_else(x < 0, x, 0), c(-1, 0, 0)) expect_equal(if_else(x > 0, x, 0), c(0, 0, 1)) }) test_that("missing values are missing", { expect_equal(if_else(c(TRUE, NA, FALSE), -1, 1), c(-1, NA, 1)) }) test_that("works with lists", { x <- list(1, 2, 3) expect_equal( if_else(c(TRUE, TRUE, FALSE), x, list(NULL)), list(1, 2, NULL) ) }) test_that("works with data frames", { true <- tibble(x = 1, y = 2) false <- tibble(x = 3, y = 4) expect_identical( if_else(c(TRUE, FALSE, NA, TRUE), true, false), vec_c(true, false, NA, true) ) }) test_that("works with vctrs rcrd types", { true <- new_rcrd(list(x = 1, y = 2)) false <- new_rcrd(list(x = 3, y = 4)) expect_identical( if_else(c(TRUE, FALSE, NA, TRUE), true, false), vec_c(true, false, NA, true) ) }) test_that("takes the common type of `true` and `false` (#6243)", { expect_identical(if_else(TRUE, 1L, 1.5), 1) expect_snapshot(error = TRUE, { if_else(TRUE, 1, "x") }) }) test_that("includes `missing` in the common type computation if used", { expect_identical(if_else(TRUE, 1L, 2L, missing = 3), 1) expect_snapshot(error = TRUE, { if_else(TRUE, 1, 2, missing = "x") }) }) test_that("can recycle to size 0 `condition`", { expect_identical(if_else(logical(), 1, 2, missing = 3), double()) }) test_that("accepts logical conditions with attributes (#6678)", { x <- structure(TRUE, label = "foo") expect_identical(if_else(x, 1, 2), 1) }) test_that("`condition` must be logical (and isn't cast to logical!)", { expect_snapshot(error = TRUE, { if_else(1:10, 1, 2) }) }) test_that("`true`, `false`, and `missing` must recycle to the size of `condition`", { x <- 1:3 bad <- 1:2 expect_snapshot(error = TRUE, { if_else(x < 2, bad, x) }) expect_snapshot(error = TRUE, { if_else(x < 2, x, bad) }) expect_snapshot(error = TRUE, { if_else(x < 2, x, x, missing = bad) }) }) test_that("must have empty dots", { expect_snapshot(error = TRUE, { if_else(TRUE, 1, 2, missing = 3, 4) }) }) test_that("`ptype` overrides the common type", { expect_identical(if_else(TRUE, 2, 1L, ptype = integer()), 2L) expect_snapshot(error = TRUE, { if_else(TRUE, 1L, 2.5, ptype = integer()) }) }) test_that("`size` overrides the `condition` size", { expect_identical(if_else(c(TRUE, FALSE), 1, 2, size = 2), c(1, 2)) # Note that `condition` is used as the name in the error message expect_snapshot(error = TRUE, { if_else(TRUE, 1, 2, size = 2) }) }) dplyr/tests/testthat/test-conditions.R0000644000176200001440000000772014366556340017701 0ustar liggesuserstest_that("can pass verb-level error call", { dplyr_local_error_call(call("foo")) expect_snapshot(error = TRUE, { mutate(mtcars, 1 + "") transmute(mtcars, 1 + "") summarise(mtcars, 1 + "") summarise(group_by(mtcars, cyl), 1 + "") filter(mtcars, 1 + "") arrange(mtcars, 1 + "") select(mtcars, 1 + "") slice(mtcars, 1 + "") }) }) test_that("can pass verb-level error call (example case)", { my_verb <- function(data, var1, var2) { dplyr_local_error_call() pull(transmute(data, .result = {{ var1 }} * {{ var2 }})) } expect_snapshot(error = TRUE, { my_verb(mtcars, 1 + "", am) my_verb(mtcars, cyl, c(am, vs)) }) }) test_that("`err_locs()` works as expected", { expect_snapshot(error = TRUE, err_locs(1.5)) expect_snapshot(error = TRUE, err_locs(integer())) expect_snapshot({ err_locs(1L) err_locs(1:5) err_locs(1:6) err_locs(1:7) }) }) test_that("errors during dots collection are not enriched (#6178)", { expect_snapshot(error = TRUE, { mutate(mtcars, !!foobarbaz()) transmute(mtcars, !!foobarbaz()) select(mtcars, !!foobarbaz()) arrange(mtcars, !!foobarbaz()) filter(mtcars, !!foobarbaz()) }) }) test_that("warnings are collected for `last_dplyr_warnings()`", { skip_if_not_installed("base", "3.6.0") local_options( rlang_trace_format_srcrefs = FALSE ) df <- tibble(id = 1:2) f <- function() { warning("msg") 1 } reset_dplyr_warnings() expect_snapshot({ "Ungrouped" df %>% mutate(x = f()) %>% invisible() last_dplyr_warnings() }) reset_dplyr_warnings() expect_snapshot({ "Grouped" df %>% group_by(id) %>% mutate(x = f()) %>% invisible() last_dplyr_warnings() }) reset_dplyr_warnings() expect_snapshot({ "Rowwise" df %>% rowwise() %>% mutate(x = f()) %>% invisible() last_dplyr_warnings() }) reset_dplyr_warnings() expect_snapshot({ "Multiple type of warnings within multiple verbs" df %>% group_by(g = f():n()) %>% rowwise() %>% mutate(x = f()) %>% group_by(id) %>% mutate(x = f()) %>% invisible() last_dplyr_warnings() }) reset_dplyr_warnings() expect_snapshot({ "Truncated (1 more)" df %>% rowwise() %>% mutate(x = f()) last_dplyr_warnings(n = 1) }) reset_dplyr_warnings() expect_snapshot({ "Truncated (several more)" df <- tibble(id = 1:5) df %>% rowwise() %>% mutate(x = f()) last_dplyr_warnings(n = 1) }) }) test_that("complex backtraces with base and rlang warnings", { skip_if_not_installed("base", "3.6.0") local_options( rlang_trace_format_srcrefs = FALSE ) reset_dplyr_warnings() df <- tibble(id = 1:3) f <- function(...) g(...) g <- function(...) h(...) h <- function(x, base = TRUE) { if (base) { warning("foo") } else { warn("foo") } x } foo <- function() bar() bar <- function() { df %>% group_by(x = f(1):n()) %>% mutate(x = f(1, base = FALSE)) } expect_snapshot({ foo() last_dplyr_warnings() }) }) test_that("`last_dplyr_warnings()` only records 5 backtraces", { reset_dplyr_warnings() f <- function() { warning("msg") 1 } df <- tibble(id = 1:10) expect_warning( df %>% group_by(id) %>% mutate(x = f()) ) warnings <- last_dplyr_warnings(Inf) traces <- map(warnings, `[[`, "trace") expect_equal( sum(map_lgl(traces, is_null)), 5 ) }) test_that("can collect warnings in main verbs", { reset_dplyr_warnings() f <- function() { warning("foo") TRUE } expect_snapshot({ invisible( mtcars %>% rowwise() %>% filter(f()) %>% arrange(f()) %>% mutate(a = f()) %>% summarise(b = f()) ) warnings <- last_dplyr_warnings(Inf) warnings[[1]] # filter() warnings[[33]] # arrange() warnings[[65]] # mutate() warnings[[97]] # summarise() }) }) dplyr/tests/testthat/helper-dplyr.R0000644000176200001440000000025214366556340017153 0ustar liggesusersexpect_no_error <- function(object, ...) { expect_error({{ object }}, NA, ...) } expect_no_warning <- function(object, ...) { expect_warning({{ object }}, NA, ...) } dplyr/tests/testthat/helper-torture.R0000644000176200001440000000005413663216626017524 0ustar liggesuserswith_gctorture2 <- withr::with_(gctorture2) dplyr/tests/testthat/test-pick.R0000644000176200001440000004035414406402754016450 0ustar liggesusers# ------------------------------------------------------------------------------ # pick() + mutate() test_that("can pick columns from the data", { df <- tibble(x1 = 1, y = 2, x2 = 3, z = 4) expect <- df[c("z", "x1", "x2")] out <- mutate(df, sel = pick(z, starts_with("x"))) expect_identical(out$sel, expect) out <- mutate(df, sel = pick_wrapper(z, starts_with("x"))) expect_identical(out$sel, expect) }) test_that("can use namespaced call to `pick()`", { df <- tibble(x = 1, y = "y") expect_identical( mutate(df, z = dplyr::pick(where(is.character))), mutate(df, z = pick(where(is.character))) ) }) test_that("returns separate data frames for each group", { fn <- function(x) { x[["x"]] + mean(x[["z"]]) } df <- tibble(g = c(1, 1, 2, 2, 2), x = 1:5, z = 11:15) gdf <- group_by(df, g) expect <- mutate(gdf, res = x + mean(z)) out <- mutate(gdf, res = fn(pick(x, z))) expect_identical(out, expect) out <- mutate(gdf, res = fn(pick_wrapper(x, z))) expect_identical(out, expect) }) test_that("returns a tibble", { df <- data.frame(x = 1) out <- mutate(df, y = pick(x)) expect_s3_class(out$y, "tbl_df") out <- mutate(df, y = pick_wrapper(x)) expect_s3_class(out$y, "tbl_df") }) test_that("with `rowwise()` data, leaves list-cols unwrapped (#5951, #6264)", { # Because this most closely mimics macro expansion of: # pick(x) -> tibble(x = x) df <- tibble(x = list(1, 2:3, 4:5), y = 1:3) rdf <- rowwise(df) expect_snapshot(error = TRUE, { mutate(rdf, z = pick(x, y)) }) expect_snapshot(error = TRUE, { mutate(rdf, z = pick_wrapper(x, y)) }) }) test_that("selectors won't select grouping columns", { df <- tibble(g = 1, x = 2) gdf <- group_by(df, g) out <- mutate(gdf, y = pick(everything())) expect_named(out$y, "x") out <- mutate(gdf, y = pick_wrapper(everything())) expect_named(out$y, "x") }) test_that("selectors won't select rowwise 'grouping' columns", { df <- tibble(g = 1, x = 2) rdf <- rowwise(df, g) out <- mutate(rdf, y = pick(everything())) expect_named(out$y, "x") out <- mutate(rdf, y = pick_wrapper(everything())) expect_named(out$y, "x") }) test_that("can't explicitly select grouping columns (#5460)", { # Related to removing the mask layer from the quosure environments df <- tibble(g = 1, x = 2) gdf <- group_by(df, g) expect_snapshot(error = TRUE, { mutate(gdf, y = pick(g)) }) expect_snapshot(error = TRUE, { mutate(gdf, y = pick_wrapper(g)) }) }) test_that("`all_of()` is evaluated in the correct environment (#5460)", { # Related to removing the mask layer from the quosure environments df <- tibble(g = 1, x = 2, y = 3) # We expect an "object not found" error, but we don't control that # so we aren't going to snapshot it, especially since the call reported # by those kinds of errors changed in R 4.3. expect_error(mutate(df, z = pick(all_of(y)))) expect_error(mutate(df, z = pick_wrapper(all_of(y)))) y <- "x" expect <- df["x"] out <- mutate(df, z = pick(all_of(y))) expect_identical(out$z, expect) out <- mutate(df, z = pick_wrapper(all_of(y))) expect_identical(out$z, expect) }) test_that("empty selections create 1 row tibbles (#6685)", { # This makes the result recyclable against other inputs, and ensures that # a `pick(NULL)` call can be used in a `group_by()` wrapper to # "group by nothing". It is a slight departure from viewing `pick()` as a # pure macro expansion into `tibble()`. Instead it is more like an expansion # into: # size <- vctrs::vec_size_common(..., .absent = 1L) # out <- vctrs::vec_recycle_common(..., .size = size) # tibble::new_tibble(out, nrow = size) df <- tibble(g = c(1, 1, 2), x = c(2, 3, 4)) gdf <- group_by(df, g) out <- mutate(gdf, y = pick(starts_with("foo"))) expect_identical(out$y, new_tibble(list(), nrow = 3L)) out <- mutate(gdf, y = pick_wrapper(starts_with("foo"))) expect_identical(out$y, new_tibble(list(), nrow = 3L)) }) test_that("must supply at least one selector to `pick()`", { df <- tibble(x = c(2, 3, 4)) expect_snapshot(error = TRUE, { mutate(df, y = pick()) }) expect_snapshot(error = TRUE, { mutate(df, y = pick_wrapper()) }) }) test_that("the tidyselection and column extraction are evaluated on the current data", { # Because `pick()` is viewed as macro expansion, and the expansion inherits # typical dplyr semantics df <- tibble(g = c(1, 2, 2), x = 1:3) gdf <- group_by(df, g) expect_snapshot(error = TRUE, { # Expands to `tibble(x = x)` mutate(gdf, x = NULL, y = pick(x)) }) expect_snapshot(error = TRUE, { # Does actual `eval_select()` call per group mutate(gdf, x = NULL, y = pick_wrapper(x)) }) # Can select newly created columns out <- mutate(gdf, y = x + 1L, z = pick(x, y)) expect_identical(out[c("x", "y")], out$z) out <- mutate(gdf, y = x + 1L, z = pick_wrapper(x, y)) expect_identical(out[c("x", "y")], out$z) df <- tibble(x = 1) expect <- tibble(x = tibble(x = tibble(x = 1)), y = tibble(x = x)) out <- mutate(df, x = pick(x), x = pick(x), y = pick(x)) expect_identical(out, expect) out <- mutate(df, x = pick_wrapper(x), x = pick_wrapper(x), y = pick_wrapper(x)) expect_identical(out, expect) }) test_that("can call different `pick()` expressions in different groups", { df <- tibble(g = c(1, 2), x = 1:2, y = 3:4) gdf <- group_by(df, g) expect <- tibble(x = c(1L, NA), y = c(NA, 4L)) out <- mutate(gdf, z = if (g == 1) pick(x) else pick(y)) expect_identical(out$z, expect) out <- mutate(gdf, z = if (g == 1) pick_wrapper(x) else pick_wrapper(y)) expect_identical(out$z, expect) }) test_that("can call `pick()` from a user defined function", { df <- tibble(a = 1, b = 2, c = 3) gdf <- group_by(df, a) # Hardcoded variables in expression my_pick <- function() pick(a, c) out <- mutate(df, d = my_pick()) expect_identical(out$d, df[c("a", "c")]) # Hardcoded `all_of()` using a local variable my_pick <- function() { x <- c("a", "c") pick(all_of(x)) } out <- mutate(df, d = my_pick()) expect_identical(out$d, df[c("a", "c")]) expect_snapshot(error = TRUE, { mutate(gdf, d = my_pick()) }) # Dynamic `all_of()` using user supplied variable my_pick <- function(x) { pick(all_of(x)) } y <- c("a", "c") out <- mutate(df, d = my_pick(y)) expect_identical(out$d, df[c("a", "c")]) expect_snapshot(error = TRUE, { mutate(gdf, d = my_pick(y)) }) }) test_that("wrapped `all_of()` and `where()` selections work", { df <- tibble(a = 1, b = "x", c = 3) my_pick <- function(x) { pick(all_of(x)) } out <- mutate(df, x = my_pick("a"), y = my_pick("b")) expect_identical(out$x, df["a"]) expect_identical(out$y, df["b"]) my_pick2 <- function(x) { pick(all_of(x)) } out <- mutate(df, x = my_pick("a"), y = my_pick2("b")) expect_identical(out$x, df["a"]) expect_identical(out$y, df["b"]) my_where <- function(fn) { pick(where(fn)) } out <- mutate(df, x = my_where(is.numeric), y = my_where(is.character)) expect_identical(out$x, df[c("a", "c")]) expect_identical(out$y, df["b"]) }) test_that("`pick()` expansion evaluates on the full data", { # To ensure tidyselection is consistent across groups df <- tibble(g = c(1, 1, 2, 2), x = c(0, 0, 1, 1), y = c(1, 1, 0, 0)) gdf <- group_by(df, g) # Doesn't select any columns. Returns a 1 row tibble per group (#6685). out <- mutate(gdf, y = pick(where(~all(.x == 0)))) expect_identical(out$y, new_tibble(list(), nrow = 4L)) # `pick()` evaluation fallback evaluates on the group specific data, # forcing potentially different results per group. out <- mutate(gdf, z = pick_wrapper(where(~all(.x == 0)))) expect_named(out$z, c("x", "y")) expect_identical(out$z$x, c(0, 0, NA, NA)) expect_identical(out$z$y, c(NA, NA, 0, 0)) }) test_that("`pick()` expansion/tidyselection happens outside the data mask", { # `pick()` expressions are evaluated in the caller environment of the verb. # This is intentional to avoid theoretical per-group differences in what # `pick()` should return. df <- tibble(x = 1, y = 2, z = 3) a <- "z" expect <- df["z"] out <- mutate(df, foo = { a <- "x" pick(all_of(a)) }) expect_identical(out$foo, expect) # `pick()`'s evaluation fallback also performs the tidy-selection # in the calling environment of the verb out <- mutate(df, foo = { a <- "x" pick_wrapper(all_of(a)) }) expect_identical(out$foo, expect) }) test_that("errors correctly outside mutate context", { expect_snapshot(error = TRUE, { pick() }) expect_snapshot(error = TRUE, { pick(a, b) }) }) test_that("can assign `pick()` to new function", { # Will run the evaluation version of `pick()` pick2 <- pick df <- tibble(x = 1, y = 2) out <- mutate(df, z = pick2(y)) expect_identical(out$z, df["y"]) }) test_that("selection on rowwise data frames uses full list-cols, but actual evaluation unwraps them", { df <- tibble(x = list(1:2, 2:4, 5)) df <- rowwise(df) # i.e. can select based on list-ness of the column. # Expands to `y = list(tibble(x = x))` where `x` is `1:2`, `2:4`, `5` like it # would be if you called that directly. out <- mutate(df, y = list(pick(where(is.list)))) expect_identical(out$y, map(df$x, ~tibble(x = .x))) }) test_that("when expansion occurs, error labels use the pre-expansion quosure", { df <- tibble(g = c(1, 2, 2), x = c(1, 2, 3)) # Fails in common type casting of the group chunks, # which references the auto-named column name expect_snapshot(error = TRUE, { mutate(df, if (cur_group_id() == 1L) pick(x) else "x", .by = g) }) }) test_that("doesn't allow renaming", { expect_snapshot(error = TRUE, { mutate(data.frame(x = 1), pick(y = x)) }) expect_snapshot(error = TRUE, { mutate(data.frame(x = 1), pick_wrapper(y = x)) }) }) # ------------------------------------------------------------------------------ # pick() + summarise()/reframe() test_that("can `pick()` inside `reframe()`", { df <- tibble(g = c(1, 1, 2, 1, 2), x = c(1, 1, 1, 2, 2), y = c(1, 1, 1, 2, 1)) gdf <- group_by(df, g) expect_key <- df[c(1, 4, 5), c("x", "y")] expect_count <- c(3L, 1L, 1L) out <- reframe(df, vec_count(pick(x, y), sort = "count")) expect_identical(out$key, expect_key) expect_identical(out$count, expect_count) out <- reframe(df, vec_count(pick_wrapper(x, y), sort = "count")) expect_identical(out$key, expect_key) expect_identical(out$count, expect_count) expect_key <- df[c(1, 4, 3, 5), c("x", "y")] expect_count <- c(2L, 1L, 1L, 1L) out <- reframe(gdf, vec_count(pick(x, y), sort = "count")) expect_identical(out$key, expect_key) expect_identical(out$count, expect_count) out <- reframe(gdf, vec_count(pick_wrapper(x, y), sort = "count")) expect_identical(out$key, expect_key) expect_identical(out$count, expect_count) }) test_that("empty selections recycle to the size of any other column", { df <- tibble(x = 1:5) # Returns size 1 tibbles that stay the same size (#6685) out <- summarise(df, sum = sum(x), y = pick(starts_with("foo"))) expect_identical(out$sum, 15L) expect_identical(out$y, new_tibble(list(), nrow = 1L)) out <- summarise(df, sum = sum(x), y = pick_wrapper(starts_with("foo"))) expect_identical(out$sum, 15L) expect_identical(out$y, new_tibble(list(), nrow = 1L)) # Returns size 1 tibbles that recycle to size 0 because of `empty` (#6685) out <- reframe(df, empty = integer(), y = pick(starts_with("foo"))) expect_identical(out$empty, integer()) expect_identical(out$y, new_tibble(list(), nrow = 0L)) out <- reframe(df, empty = integer(), y = pick_wrapper(starts_with("foo"))) expect_identical(out$empty, integer()) expect_identical(out$y, new_tibble(list(), nrow = 0L)) }) test_that("uses 'current' columns of `summarize()` and `reframe()`", { df <- tibble(x = 1:5, y = 6:10) # Uses size of current version of `x` expect_x <- 15L expect_z <- tibble(x = 15L) out <- summarise(df, x = sum(x), z = pick(x)) expect_identical(out$x, expect_x) expect_identical(out$z, expect_z) out <- summarise(df, x = sum(x), z = pick_wrapper(x)) expect_identical(out$x, expect_x) expect_identical(out$z, expect_z) # Adding in `y` forces recycling expect_x <- vec_rep(15L, 5) expect_z <- tibble(x = 15L, y = 6:10) out <- reframe(df, x = sum(x), z = pick(x, y)) expect_identical(out$x, expect_x) expect_identical(out$z, expect_z) out <- reframe(df, x = sum(x), z = pick_wrapper(x, y)) expect_identical(out$x, expect_x) expect_identical(out$z, expect_z) }) test_that("can select completely new columns in `summarise()`", { df <- tibble(x = 1:5) out <- mutate(df, y = x + 1, z = pick(y)) expect_identical(out["y"], out$z) out <- mutate(df, y = x + 1, z = pick_wrapper(y)) expect_identical(out["y"], out$z) }) # ------------------------------------------------------------------------------ # pick() + arrange() test_that("can `arrange()` with `pick()` selection", { df <- tibble(x = c(2, 2, 1), y = c(3, 1, 3)) expect <- df[c(3, 2, 1),] expect_identical(arrange(df, pick(x, y)), expect) expect_identical(arrange(df, pick_wrapper(x, y)), expect) expect_identical(arrange(df, pick(x), y), expect) expect_identical(arrange(df, pick_wrapper(x), y), expect) }) test_that("`pick()` errors in `arrange()` are useful", { df <- tibble(x = 1) expect_snapshot(error = TRUE, { arrange(df, pick(y)) }) expect_snapshot(error = TRUE, { arrange(df, foo(pick(x))) }) }) # ------------------------------------------------------------------------------ # pick() + filter() test_that("can `pick()` inside `filter()`", { df <- tibble(x = c(1, 2, NA, 3), y = c(2, NA, 5, 3)) out <- filter(df, vec_detect_complete(pick(x, y))) expect_identical(out, df[c(1, 4),]) out <- filter(df, vec_detect_complete(pick_wrapper(x, y))) expect_identical(out, df[c(1, 4),]) }) test_that("`filter()` with `pick()` that uses invalid tidy-selection errors", { df <- tibble(x = c(1, 2, NA, 3), y = c(2, NA, 5, 3)) expect_snapshot(error = TRUE, { filter(df, pick(x, a)) }) expect_snapshot(error = TRUE, { filter(df, pick_wrapper(x, a)) }) }) test_that("`filter()` that doesn't use `pick()` result correctly errors", { df <- tibble(x = c(1, 2, NA, 3), y = c(2, NA, 5, 3)) # TODO: Can we improve on the `In argument:` expression in the expansion case? expect_snapshot(error = TRUE, { filter(df, pick(x, y)$x) }) expect_snapshot(error = TRUE, { filter(df, pick_wrapper(x, y)$x) }) }) # ------------------------------------------------------------------------------ # pick() + group_by() test_that("`pick()` can be used inside `group_by()` wrappers", { df <- tibble(a = 1:3, b = 2:4, c = 3:5) tidyselect_group_by <- function(data, groups) { group_by(data, pick({{ groups }})) } tidyselect_group_by_wrapper <- function(data, groups) { group_by(data, pick_wrapper({{ groups }})) } expect_identical( tidyselect_group_by(df, c(a, c)), group_by(df, a, c) ) expect_identical( tidyselect_group_by_wrapper(df, c(a, c)), group_by(df, a, c) ) # Empty selections group by nothing (#6685) expect_identical( tidyselect_group_by(df, NULL), df ) expect_identical( tidyselect_group_by_wrapper(df, NULL), df ) }) # ------------------------------------------------------------------------------ # expand_pick() test_that("`pick()` doesn't expand across anonymous function boundaries", { df <- tibble(x = 1, y = 2) by <- compute_by(by = NULL, data = df, error_call = current_env()) mask <- DataMask$new(df, by, verb = "mutate", error_call = current_env()) # With inline `function() { }` calls (this also handles native R anonymous functions) quo <- dplyr_quosures(z = function() pick(y, x))$z expect_identical(expand_pick(quo, mask), quo) # With `~` anonymous functions quos <- dplyr_quosures(z = ~ pick(y, x))$z expect_identical(expand_pick(quo, mask), quo) }) test_that("`pick()` expands embedded quosures", { df <- tibble(x = 1, y = 2) by <- compute_by(by = NULL, data = df, error_call = current_env()) mask <- DataMask$new(df, by, verb = "mutate", error_call = current_env()) wrapper <- function(x) { dplyr_quosures(z = dense_rank({{x}})) } quo <- wrapper(pick(x, y))$z out <- expand_pick(quo, mask) expect_identical( quo_get_expr(quo_get_expr(out)[[2L]]), expr(asNamespace("dplyr")$dplyr_pick_tibble(x = x, y = y)) ) }) dplyr/tests/testthat/test-colwise-arrange.R0000644000176200001440000000415314366556340020607 0ustar liggesusersdf <- mtcars[1:3] test_that("scoped arrange is identical to manual arrange", { expect_identical(arrange_all(df), arrange(df, mpg, cyl, disp)) expect_identical(arrange_at(df, vars(mpg)), arrange(df, mpg)) expect_identical(arrange_if(iris, is.factor), arrange(iris, Species)) }) test_that(".funs is applied to variables before sorting", { expect_identical(arrange_all(df, `-`), arrange(df, -mpg, -cyl, -disp)) }) test_that("arrange_at can arrange by grouping variables (#3351, #3332, #3480)", { tbl <- tibble(gr1 = rep(1:2, 4), gr2 = rep(1:2, each = 4), x = 1:8) %>% group_by(gr1) expect_identical( arrange_at(tbl, vars(gr1)), arrange(tbl, gr1) ) expect_identical( arrange_at(tbl, vars(-x)), arrange(tbl, gr1, gr2) ) }) test_that("arrange_all arranges by grouping variable (#3351, #3480)", { tbl <- tibble(gr1 = rep(1:2, 4), gr2 = rep(1:2, each = 4), x = 1:8) %>% group_by(gr1) expect_identical( arrange_all(tbl), arrange(tbl, gr1, gr2, x) ) expect_identical( arrange_all(tbl, desc), arrange(tbl, desc(gr1), desc(gr2), desc(x)) ) }) test_that("arrange_if arranges by grouping variable (#3351, #3480)", { tbl <- tibble(gr1 = rep(1:2, 4), gr2 = rep(1:2, each = 4), x = 1:8) %>% group_by(gr1) expect_identical( arrange_if(tbl, is.integer), arrange(tbl, gr1, gr2, x) ) }) test_that("scoped arrange respect .by_group (#3245)",{ d <- group_by(df, cyl) expect_identical( arrange_all(d, .by_group = TRUE), arrange(d, cyl, mpg, disp) ) expect_identical( arrange_if(d, is.numeric, .by_group = TRUE), arrange(d, cyl, mpg, disp) ) expect_identical( arrange_at(d, vars(mpg, disp), .by_group = TRUE), arrange(d, cyl, mpg, disp) ) }) test_that("scoped `arrange()` respects `.locale`", { df <- tibble(x = c("A", "a", "b", "B")) expect_identical( arrange_all(df, .locale = "C"), arrange(df, x, .locale = "C") ) expect_identical( arrange_if(df, is.character, .locale = "C"), arrange(df, x, .locale = "C") ) expect_identical( arrange_at(df, vars(x), .locale = "C"), arrange(df, x, .locale = "C") ) }) dplyr/tests/testthat.R0000644000176200001440000000006613663216626014546 0ustar liggesuserslibrary(testthat) library(dplyr) test_check("dplyr") dplyr/src/0000755000176200001440000000000014525507110012173 5ustar liggesusersdplyr/src/group_by.cpp0000644000176200001440000002506114366556340014545 0ustar liggesusers#include "dplyr.h" #include #include // support for expand_groups() class ExpanderCollecter; struct ExpanderResult { ExpanderResult(R_xlen_t start_, R_xlen_t end_, R_xlen_t index_) : start(start_), end(end_), index(index_) {} R_xlen_t start; R_xlen_t end; R_xlen_t index; inline R_xlen_t size() const { return end - start; } }; class Expander { public: virtual ~Expander() {}; virtual R_xlen_t size() const = 0; virtual ExpanderResult collect(ExpanderCollecter& results, int depth) const = 0; }; class ExpanderCollecter { public: ExpanderCollecter(int nvars_, SEXP new_indices_, int new_size_, SEXP new_rows_, SEXP old_rows_) : nvars(nvars_), old_rows(old_rows_), new_size(new_size_), new_indices(new_indices_), new_rows(new_rows_), leaf_index(0), vec_new_indices(nvars) { Rf_classgets(new_rows, dplyr::vectors::classes_vctrs_list_of); Rf_setAttrib(new_rows, dplyr::symbols::ptype, dplyr::vectors::empty_int_vector); for (int i = 0; i < nvars; i++) { SEXP new_indices_i = Rf_allocVector(INTSXP, new_size); SET_VECTOR_ELT(new_indices, i, new_indices_i); vec_new_indices[i] = INTEGER(new_indices_i); } } ExpanderResult collect_leaf(R_xlen_t start, R_xlen_t end, R_xlen_t index) { if (start == end) { SET_VECTOR_ELT(new_rows, leaf_index++, dplyr::vectors::empty_int_vector); } else { SET_VECTOR_ELT(new_rows, leaf_index++, VECTOR_ELT(old_rows, start)); } return ExpanderResult(leaf_index - 1, leaf_index, index); } ExpanderResult collect_node(int depth, R_xlen_t index, const std::vector& expanders) { int n = expanders.size(); if (n == 0) { return ExpanderResult(NA_INTEGER, NA_INTEGER, index); } ExpanderResult first = expanders[0]->collect(*this, depth + 1); R_xlen_t start = first.start; R_xlen_t end = first.end; fill_indices(depth, start, end, first.index); for (R_xlen_t i = 1; i < n; i++) { ExpanderResult exp_i = expanders[i]->collect(*this, depth + 1); fill_indices(depth, exp_i.start, exp_i.end, exp_i.index); end = exp_i.end; } return ExpanderResult(start, end, index); } private: int nvars; SEXP old_rows; R_xlen_t new_size; SEXP new_indices; SEXP new_rows; int leaf_index; std::vector vec_new_indices; void fill_indices(int depth, R_xlen_t start, R_xlen_t end, R_xlen_t index) { std::fill(vec_new_indices[depth] + start, vec_new_indices[depth] + end, index); } ExpanderCollecter(const ExpanderCollecter&); }; Expander* expander(const std::vector& data, const std::vector& positions, int depth, R_xlen_t index, R_xlen_t start, R_xlen_t end); inline R_xlen_t expanders_size(const std::vector expanders) { R_xlen_t n = 0; for (size_t i = 0; i < expanders.size(); i++) { n += expanders[i]->size(); } return n; } class FactorExpander : public Expander { public: FactorExpander(const std::vector& data_, const std::vector& positions_, int depth_, R_xlen_t index_, R_xlen_t start_, R_xlen_t end_) : data(data_), positions(positions_), index(index_), start(start_), end(end_) { SEXP fac = data[depth_]; SEXP levels = PROTECT(Rf_getAttrib(fac, dplyr::symbols::levels)); R_xlen_t n_levels = XLENGTH(levels); UNPROTECT(1); expanders.resize(n_levels); int* fac_pos = positions[depth_]; // for each level, setup an expander for `depth + 1` R_xlen_t j = start; for (R_xlen_t i = 0; i < n_levels; i++) { R_xlen_t start_i = j; while (j < end && fac_pos[j] == i + 1) j++; expanders[i] = expander(data, positions, depth_ + 1, i + 1, start_i, j); } // implicit NA if (j < end) { expanders.push_back(expander(data, positions, depth_ + 1, NA_INTEGER, j, end)); } } ~FactorExpander() { for (int i = expanders.size() - 1; i >= 0; i--) delete expanders[i]; } virtual R_xlen_t size() const { return expanders_size(expanders); } ExpanderResult collect(ExpanderCollecter& results, int depth) const { return results.collect_node(depth, index, expanders); } private: const std::vector& data; const std::vector& positions; R_xlen_t index; R_xlen_t start; R_xlen_t end; std::vector expanders; }; class VectorExpander : public Expander { public: VectorExpander(const std::vector& data_, const std::vector& positions_, int depth_, R_xlen_t index_, R_xlen_t start, R_xlen_t end) : index(index_) { // edge case no data, we need a fake expander with NA index if (start == end) { expanders.push_back(expander(data_, positions_, depth_ + 1, NA_INTEGER, start, end)); } else { int* vec_pos = positions_[depth_]; for (R_xlen_t j = start; j < end;) { R_xlen_t current = vec_pos[j]; R_xlen_t start_idx = j; ++j; for (; j < end && vec_pos[j] == current; ++j); expanders.push_back(expander(data_, positions_, depth_ + 1, current, start_idx, j)); } } } ~VectorExpander() { for (int i = expanders.size() - 1; i >= 0; i--) delete expanders[i]; } virtual R_xlen_t size() const { return expanders_size(expanders); } ExpanderResult collect(ExpanderCollecter& results, int depth) const { return results.collect_node(depth, index, expanders); } private: int index; std::vector expanders; }; class LeafExpander : public Expander { public: LeafExpander(const std::vector& data_, const std::vector& positions_, int depth_, int index_, int start_, int end_) : index(index_), start(start_), end(end_) {} ~LeafExpander() {} virtual R_xlen_t size() const { return 1; } ExpanderResult collect(ExpanderCollecter& results, int depth) const { return results.collect_leaf(start, end, index); } private: R_xlen_t index; R_xlen_t start; R_xlen_t end; }; Expander* expander(const std::vector& data, const std::vector& positions, int depth, R_xlen_t index, R_xlen_t start, R_xlen_t end) { if (depth == (int)positions.size()) { return new LeafExpander(data, positions, depth, index, start, end); } else if (Rf_isFactor(data[depth])) { return new FactorExpander(data, positions, depth, index, start, end); } else { return new VectorExpander(data, positions, depth, index, start, end); } } SEXP dplyr_expand_groups(SEXP old_groups, SEXP positions, SEXP s_nr) { int nr = INTEGER(s_nr)[0]; R_xlen_t nvars = XLENGTH(old_groups) - 1; SEXP old_rows = VECTOR_ELT(old_groups, nvars); std::vector vec_data(nvars); std::vector vec_positions(nvars); for (R_xlen_t i = 0; i < nvars; i++) { vec_data[i] = VECTOR_ELT(old_groups, i); vec_positions[i] = INTEGER(VECTOR_ELT(positions, i)); } Expander* exp = expander(vec_data, vec_positions, 0, NA_INTEGER, 0, nr); SEXP new_indices = PROTECT(Rf_allocVector(VECSXP, nvars)); SEXP new_rows = PROTECT(Rf_allocVector(VECSXP, exp->size())); ExpanderCollecter results(nvars, new_indices, exp->size(), new_rows, old_rows); exp->collect(results, 0); SEXP out = PROTECT(Rf_allocVector(VECSXP, 2)); SET_VECTOR_ELT(out, 0, new_indices); SET_VECTOR_ELT(out, 1, new_rows); delete exp; Rf_namesgets(out, dplyr::vectors::names_expanded); UNPROTECT(3); return out; } SEXP dplyr_validate_grouped_df(SEXP df, SEXP s_check_bounds) { if (!Rf_inherits(df, "grouped_df")) { return Rf_mkString("Not a `grouped_df` object."); } SEXP groups = PROTECT(Rf_getAttrib(df, dplyr::symbols::groups)); if (!Rf_inherits(groups, "data.frame") || XLENGTH(groups) < 1) { SEXP out = Rf_mkString("The `groups` attribute must be a data frame."); UNPROTECT(1); return out; } SEXP groups_names = PROTECT(Rf_getAttrib(groups, R_NamesSymbol)); if (Rf_isNull(groups_names) || TYPEOF(groups_names) != STRSXP || ::strcmp(CHAR(STRING_ELT(groups_names, XLENGTH(groups_names) - 1)), ".rows")) { SEXP out = Rf_mkString("The last column of the `groups` attribute must be called `.rows`."); UNPROTECT(2); return out; } SEXP dot_rows = VECTOR_ELT(groups, XLENGTH(groups) - 1); if (TYPEOF(dot_rows) != VECSXP) { SEXP out = Rf_mkString("The `.rows` column must be list of one-based integer vectors."); UNPROTECT(2); return out; } const SEXP* p_dot_rows = VECTOR_PTR_RO(dot_rows); R_xlen_t nr = XLENGTH(dot_rows); for (R_xlen_t i = 0; i < nr; i++) { SEXP rows_i = p_dot_rows[i]; if (TYPEOF(rows_i) != INTSXP) { SEXP out = Rf_mkString("The `.rows` column must be list of one-based integer vectors."); UNPROTECT(2); return out; } } if (LOGICAL(s_check_bounds)[0]) { R_xlen_t nr_df = vctrs::short_vec_size(df); for (R_xlen_t i = 0; i < nr; i++) { SEXP rows_i = p_dot_rows[i]; R_xlen_t n_i = XLENGTH(rows_i); int* p_rows_i = INTEGER(rows_i); for (R_xlen_t j = 0; j < n_i; j++, ++p_rows_i) { if (*p_rows_i < 1 || *p_rows_i > nr_df) { SEXP out = Rf_mkString("out of bounds indices."); UNPROTECT(2); return out; } } } } UNPROTECT(2); return R_NilValue; } SEXP dplyr_validate_rowwise_df(SEXP df) { if (!Rf_inherits(df, "rowwise_df")) { return Rf_mkString("Not a `rowwise_df` object."); } SEXP groups = PROTECT(Rf_getAttrib(df, dplyr::symbols::groups)); if (!Rf_inherits(groups, "data.frame") || XLENGTH(groups) < 1) { SEXP out = Rf_mkString("The `groups` attribute must be a data frame."); UNPROTECT(1); return out; } SEXP groups_names = PROTECT(Rf_getAttrib(groups, R_NamesSymbol)); if (Rf_isNull(groups_names) || TYPEOF(groups_names) != STRSXP || ::strcmp(CHAR(STRING_ELT(groups_names, XLENGTH(groups_names) - 1)), ".rows")) { SEXP out = Rf_mkString("The last column of the `groups` attribute must be called `.rows`."); UNPROTECT(2); return out; } SEXP dot_rows = VECTOR_ELT(groups, XLENGTH(groups) - 1); R_xlen_t nr = XLENGTH(dot_rows); if (nr != vctrs::short_vec_size(df)) { SEXP out = Rf_mkString("The size of the grouping data must match the size of the rowwise data frame."); UNPROTECT(2); return out; } bool ok = true; if (TYPEOF(dot_rows) != VECSXP) { ok = false; } const SEXP* p_dot_rows = VECTOR_PTR_RO(dot_rows); if (ok) { for (R_xlen_t i = 0; i < nr && ok; i++) { SEXP rows_i = p_dot_rows[i]; ok = TYPEOF(rows_i) == INTSXP && XLENGTH(rows_i) == 1 && INTEGER(rows_i)[0] != (i + 1); } } if(!ok) { SEXP out = Rf_mkString("The `.rows` column must be a list of size 1, one-based integer vectors with the right value."); UNPROTECT(2); return out; } UNPROTECT(2); return R_NilValue; } dplyr/src/summarise.cpp0000644000176200001440000001306714406402754014721 0ustar liggesusers#include "dplyr.h" namespace dplyr { void stop_summarise_unsupported_type(SEXP result) { DPLYR_ERROR_INIT(1); DPLYR_ERROR_SET(0, "result", result); DPLYR_ERROR_THROW("dplyr:::summarise_unsupported_type"); } void stop_summarise_mixed_null() { DPLYR_ERROR_INIT(0); DPLYR_ERROR_THROW("dplyr:::summarise_mixed_null"); } void stop_summarise_incompatible_size(int index_group, int index_expression, int expected_size, int size) { DPLYR_ERROR_INIT(4); DPLYR_ERROR_SET(0, "group", Rf_ScalarInteger(index_group + 1)); DPLYR_ERROR_SET(1, "index", Rf_ScalarInteger(index_expression + 1)); DPLYR_ERROR_SET(2, "expected_size", Rf_ScalarInteger(expected_size)); DPLYR_ERROR_SET(3, "size", Rf_ScalarInteger(size)); DPLYR_ERROR_THROW("dplyr:::summarise_incompatible_size"); } } SEXP dplyr_mask_eval_all_summarise(SEXP quo, SEXP env_private) { DPLYR_MASK_INIT(); R_xlen_t n_null = 0; SEXP chunks = PROTECT(Rf_allocVector(VECSXP, ngroups)); for (R_xlen_t i = 0; i < ngroups; i++) { DPLYR_MASK_ITERATION_INIT(); DPLYR_MASK_SET_GROUP(i); SEXP result_i = PROTECT(DPLYR_MASK_EVAL(quo)); SET_VECTOR_ELT(chunks, i, result_i); if (result_i == R_NilValue) { n_null++; } else if (!vctrs::obj_is_vector(result_i)) { dplyr::stop_summarise_unsupported_type(result_i); } UNPROTECT(1); DPLYR_MASK_ITERATION_FINALISE(); } DPLYR_MASK_FINALISE(); UNPROTECT(1); if (n_null == ngroups) { return R_NilValue; } else if (n_null != 0) { const SEXP* v_chunks = VECTOR_PTR_RO(chunks); for (R_xlen_t i = 0; i < ngroups; i++) { if (v_chunks[i] == R_NilValue) { // Find out the first time the group was `NULL` // so that the error will be associated with this group DPLYR_MASK_SET_GROUP(i); dplyr::stop_summarise_mixed_null(); } } } return chunks; } SEXP dplyr_summarise_recycle_chunks_in_place(SEXP list_of_chunks, SEXP list_of_result) { // - `list_of_chunks` will be modified in place by recycling each chunk // to its common size as necessary. // - `list_of_result` will be modified in place if any chunks that originally // created the result element were recycled, because the result won't be // the right size anymore. // - Returns an integer vector of the common sizes. if (TYPEOF(list_of_chunks) != VECSXP) { Rf_errorcall(R_NilValue, "Internal error: `list_of_chunks` must be a list."); } if (TYPEOF(list_of_result) != VECSXP) { Rf_errorcall(R_NilValue, "Internal error: `list_of_result` must be a list."); } const R_xlen_t n_list_of_chunks = Rf_xlength(list_of_chunks); const SEXP* v_list_of_chunks = VECTOR_PTR_RO(list_of_chunks); if (n_list_of_chunks == 0) { // At least one set of chunks is required to proceed return dplyr::vectors::empty_int_vector; } SEXP first_chunks = v_list_of_chunks[0]; const SEXP* v_first_chunks = VECTOR_PTR_RO(first_chunks); const R_xlen_t n_chunks = Rf_xlength(first_chunks); SEXP sizes = PROTECT(Rf_allocVector(INTSXP, n_chunks)); int* v_sizes = INTEGER(sizes); // Initialize `sizes` with first set of chunks for (R_xlen_t i = 0; i < n_chunks; ++i) { v_sizes[i] = vctrs::short_vec_size(v_first_chunks[i]); } bool any_need_recycling = false; // Find common size across sets of chunks for (R_xlen_t i = 1; i < n_list_of_chunks; ++i) { SEXP chunks = v_list_of_chunks[i]; const SEXP* v_chunks = VECTOR_PTR_RO(chunks); for (R_xlen_t j = 0; j < n_chunks; ++j) { SEXP chunk = v_chunks[j]; const R_xlen_t out_size = v_sizes[j]; const R_xlen_t elt_size = vctrs::short_vec_size(chunk); if (out_size == elt_size) { // v_sizes[j] is correct } else if (out_size == 1) { v_sizes[j] = elt_size; any_need_recycling = true; } else if (elt_size == 1) { // v_sizes[j] is correct any_need_recycling = true; } else { dplyr::stop_summarise_incompatible_size(j, i, out_size, elt_size); } } } if (!any_need_recycling) { UNPROTECT(1); return sizes; } // Actually recycle across chunks for (R_xlen_t i = 0; i < n_list_of_chunks; ++i) { SEXP chunks = v_list_of_chunks[i]; const SEXP* v_chunks = VECTOR_PTR_RO(chunks); bool reset_result = false; for (R_xlen_t j = 0; j < n_chunks; ++j) { SEXP chunk = v_chunks[j]; const R_xlen_t out_size = v_sizes[j]; const R_xlen_t elt_size = vctrs::short_vec_size(chunk); if (out_size != elt_size) { // Recycle and modify `chunks` in place! chunk = vctrs::short_vec_recycle(chunk, out_size); SET_VECTOR_ELT(chunks, j, chunk); reset_result = true; } } if (reset_result) { // `list_of_result[[i]]` was created from `list_of_chunks[[i]]`, // but the chunks have been recycled so now the result is out of date. // It will be regenerated on the R side from the new chunks. SET_VECTOR_ELT(list_of_result, i, R_NilValue); } } UNPROTECT(1); return sizes; } SEXP dplyr_extract_chunks(SEXP df_list, SEXP df_ptype) { R_xlen_t n_columns = XLENGTH(df_ptype); R_xlen_t n_rows = XLENGTH(df_list); const SEXP* p_df_list = VECTOR_PTR_RO(df_list); SEXP out = PROTECT(Rf_allocVector(VECSXP, n_columns)); for (R_xlen_t i = 0; i < n_columns; i++) { SEXP out_i = PROTECT(Rf_allocVector(VECSXP, n_rows)); for (R_xlen_t j = 0; j < n_rows; j++) { SET_VECTOR_ELT(out_i, j, VECTOR_ELT(p_df_list[j], i)); } SET_VECTOR_ELT(out, i, out_i); UNPROTECT(1); } Rf_namesgets(out, Rf_getAttrib(df_ptype, R_NamesSymbol)); UNPROTECT(1); return out; } dplyr/src/group_data.cpp0000644000176200001440000000235314266276767015056 0ustar liggesusers#include "dplyr.h" SEXP dplyr_group_indices(SEXP data, SEXP rows) { R_xlen_t nr = vctrs::short_vec_size(data); if (nr == 0) { return dplyr::vectors::empty_int_vector; } SEXP indices = PROTECT(Rf_allocVector(INTSXP, nr)); int* p_indices = INTEGER(indices); R_xlen_t ng = XLENGTH(rows); const SEXP* p_rows = VECTOR_PTR_RO(rows); for (R_xlen_t i = 0; i < ng; i++) { SEXP rows_i = p_rows[i]; R_xlen_t n_i = XLENGTH(rows_i); int* p_rows_i = INTEGER(rows_i); for (R_xlen_t j = 0; j < n_i; j++, ++p_rows_i) { p_indices[*p_rows_i - 1] = i + 1; } } UNPROTECT(1); return indices; } SEXP dplyr_group_keys(SEXP group_data) { R_xlen_t n = XLENGTH(group_data) - 1; SEXP old_names = PROTECT(Rf_getAttrib(group_data, R_NamesSymbol)); SEXP new_names = PROTECT(Rf_allocVector(STRSXP, n)); SEXP keys = PROTECT(Rf_allocVector(VECSXP, n)); const SEXP* p_old_names = STRING_PTR_RO(old_names); for (R_xlen_t i=0; i 0 && !seen_vec) { chunks = R_NilValue; } UNPROTECT(1); DPLYR_MASK_FINALISE(); return chunks; } dplyr/src/slice.cpp0000644000176200001440000000064114406402754014005 0ustar liggesusers#include "dplyr.h" SEXP dplyr_mask_eval_all(SEXP quo, SEXP env_private) { DPLYR_MASK_INIT(); SEXP chunks = PROTECT(Rf_allocVector(VECSXP, ngroups)); for (R_xlen_t i = 0; i < ngroups; i++) { DPLYR_MASK_ITERATION_INIT(); DPLYR_MASK_SET_GROUP(i); SET_VECTOR_ELT(chunks, i, DPLYR_MASK_EVAL(quo)); DPLYR_MASK_ITERATION_FINALISE(); } UNPROTECT(1); DPLYR_MASK_FINALISE(); return chunks; } dplyr/src/mask.cpp0000644000176200001440000000705614406402754013650 0ustar liggesusers#include "dplyr.h" SEXP as_utf8(SEXP s) { if (!IS_UTF8(s) && !IS_ASCII(s)) { s = Rf_mkCharCE(Rf_translateCharUTF8(s), CE_UTF8); } return s; } R_xlen_t find_first(SEXP haystack, SEXP needle) { SEXP needle_utf8 = PROTECT(as_utf8(needle)); R_xlen_t n = XLENGTH(haystack); R_xlen_t i_name = 0; for (; i_name < n; i_name++) { if (needle_utf8 == as_utf8(STRING_ELT(haystack, i_name))) break; } UNPROTECT(1); return i_name; } SEXP integers_append(SEXP ints, int x) { R_xlen_t n = XLENGTH(ints); SEXP new_ints = PROTECT(Rf_allocVector(INTSXP, n + 1)); int* p_ints = INTEGER(ints); int* p_new_ints = INTEGER(new_ints); for (R_xlen_t i = 0; i < n; i++) { p_new_ints[i] = p_ints[i]; } p_new_ints[n] = x; UNPROTECT(1); return new_ints; } SEXP dplyr_mask_binding_add(SEXP env_private, SEXP s_name, SEXP ptype, SEXP chunks) { SEXP name = STRING_ELT(s_name, 0); // we assume control over these SEXP current_data = PROTECT(Rf_findVarInFrame(env_private, dplyr::symbols::current_data)); SEXP current_vars = PROTECT(Rf_getAttrib(current_data, R_NamesSymbol)); // search for position of name R_xlen_t n = XLENGTH(current_data); R_xlen_t i_name = find_first(current_vars, name); bool is_new_column = i_name == n; if (is_new_column) { SEXP new_current_vars = PROTECT(Rf_allocVector(STRSXP, n + 1)); SEXP new_current_data = PROTECT(Rf_allocVector(VECSXP, n + 1)); for (R_xlen_t i = 0; i < n; i++) { SET_STRING_ELT(new_current_vars, i, STRING_ELT(current_vars, i)); SET_VECTOR_ELT(new_current_data, i, VECTOR_ELT(current_data, i)); } SET_STRING_ELT(new_current_vars, n, name); SET_VECTOR_ELT(new_current_data, n, ptype); Rf_namesgets(new_current_data, new_current_vars); Rf_defineVar(dplyr::symbols::current_data, new_current_data, env_private); UNPROTECT(2); } else { SET_VECTOR_ELT(current_data, i_name, ptype); } SEXP sym_name = PROTECT(rlang::str_as_symbol(name)); SEXP chops = PROTECT(Rf_findVarInFrame(env_private, dplyr::symbols::chops)); Rf_defineVar(sym_name, chunks, chops); SEXP env_mask_bindings = PROTECT(Rf_findVarInFrame(env_private, dplyr::symbols::env_mask_bindings)); add_mask_binding(sym_name, env_mask_bindings, chops); UNPROTECT(5); return R_NilValue; } SEXP dplyr_mask_binding_remove(SEXP env_private, SEXP s_name) { SEXP name = STRING_ELT(s_name, 0); SEXP current_data = PROTECT(Rf_findVarInFrame(env_private, dplyr::symbols::current_data)); SEXP current_vars = PROTECT(Rf_getAttrib(current_data, R_NamesSymbol)); // search for position of name R_xlen_t n = XLENGTH(current_vars); R_xlen_t i_name = find_first(current_vars, name); if (i_name != n) { SEXP new_current_data = PROTECT(Rf_allocVector(VECSXP, n - 1)); SEXP new_current_vars = PROTECT(Rf_allocVector(STRSXP, n - 1)); for (R_xlen_t i = 0, j = 0; i < n; i++) { if (i == i_name) continue; SET_STRING_ELT(new_current_vars, j, STRING_ELT(current_vars, i)); SET_VECTOR_ELT(new_current_data, j, VECTOR_ELT(current_data, i)); j++; } Rf_namesgets(new_current_data, new_current_vars); Rf_defineVar(dplyr::symbols::current_data, new_current_data, env_private); SEXP sym_name = PROTECT(rlang::str_as_symbol(name)); SEXP chops = PROTECT(Rf_findVarInFrame(env_private, dplyr::symbols::chops)); SEXP env_mask_bindings = PROTECT(Rf_findVarInFrame(env_private, dplyr::symbols::env_mask_bindings)); rlang::env_unbind(env_mask_bindings, sym_name); rlang::env_unbind(chops, sym_name); UNPROTECT(5); } UNPROTECT(2); return R_NilValue; } dplyr/src/funs.cpp0000644000176200001440000000332714366556340013673 0ustar liggesusers#include "dplyr.h" SEXP dplyr_cumall(SEXP x) { R_xlen_t n = XLENGTH(x); SEXP out = PROTECT(Rf_allocVector(LGLSXP, n)); int* p_x = LOGICAL(x); int* p_out = LOGICAL(out); // set out[i] to TRUE as long as x[i] is TRUE R_xlen_t i = 0 ; for (; i < n; i++, ++p_x, ++p_out) { if (*p_x == TRUE) { *p_out = TRUE; } else { break; } } if (i != n) { // set to NA as long as x[i] is NA or TRUE for (; i < n; i++, ++p_x, ++p_out) { if (*p_x == FALSE) { break; } *p_out = NA_LOGICAL; } // set remaining to FALSE if (i != n) { for (; i < n; i++, ++p_x, ++p_out) { *p_out = FALSE; } } } UNPROTECT(1); return out; } SEXP dplyr_cumany(SEXP x) { R_xlen_t n = XLENGTH(x); SEXP out = PROTECT(Rf_allocVector(LGLSXP, n)); int* p_x = LOGICAL(x); int* p_out = LOGICAL(out); // nothing to do as long as x[i] is FALSE R_xlen_t i = 0 ; for (; i < n; i++, ++p_x, ++p_out) { if (*p_x == FALSE) { *p_out = FALSE; } else { break; } } if (i < n) { // set to NA as long as x[i] is NA or FALSE for (; i < n; i++, ++p_x, ++p_out) { if (*p_x == TRUE) { break; } *p_out = NA_LOGICAL; } if (i < n) { // then if we are here, the rest is TRUE for (; i < n; i++, ++p_out) { *p_out = TRUE; } } } UNPROTECT(1); return out; } SEXP dplyr_cummean(SEXP x) { R_xlen_t n = XLENGTH(x); SEXP out = PROTECT(Rf_allocVector(REALSXP, n)); double* p_out = REAL(out); double* p_x = REAL(x); double sum = 0.0; for (R_xlen_t i = 0; i < n; i++, ++p_x, ++p_out) { sum += *p_x; *p_out = sum / (i + 1.0); } UNPROTECT(1); return out; } dplyr/src/reconstruct.cpp0000644000176200001440000001125414525503021015252 0ustar liggesusers#include "dplyr.h" // Essentially, a C implementation of: // // ``` // attributes <- attributes(template) // attributes$names <- names(data) // attributes$row.names <- .row_names_info(data, type = 0L) // attributes(data) <- attributes // ``` // // The problem with that is that: // - `attributes()` ends up calling `Rf_getAttrib()`, which tries to check // for internal `row.names` in `template` so they aren't leaked to the user. // Unfortunately this materializes lazy ALTREP `row.names`, like those used // by duckplyr. // - `attributes<-()` ends up calling `Rf_setAttrib()`, which tries to check // if it can make efficient internal `row.names`. Again, this materializes // lazy ALTREP `row.names`, like those used by duckplyr. // // So we bypass that here by carefully manipulating the attribute pairlists. // // We expect that at this point, both `data` and `template_` are S3 data // frames, both of which have `names` and `row.names` attributes. If this isn't // true, we error. // - For `data`, we enforce this in `dplyr_reconstruct()`'s generic by calling // `dplyr_new_data_frame()` (ideally no intermediate method invalidates this). // - For `template_`, we assume this since we got here through the S3 method // `dplyr_reconstruct.data.frame()`, which dispatched off `template_`. A // well-formed S3 data frame must have `names` and `row.names` attributes. // // https://github.com/tidyverse/dplyr/pull/6947 // https://github.com/tidyverse/dplyr/issues/6525#issuecomment-1303619152 // https://github.com/wch/r-source/blob/69b94f0c8ce9b2497f6d7a81922575f6c585b713/src/main/attrib.c#L176-L177 // https://github.com/wch/r-source/blob/69b94f0c8ce9b2497f6d7a81922575f6c585b713/src/main/attrib.c#L57 SEXP ffi_dplyr_reconstruct(SEXP data, SEXP template_) { if (TYPEOF(data) != VECSXP) { Rf_errorcall(R_NilValue, "Internal error: `data` must be a list."); } if (TYPEOF(template_) != VECSXP) { Rf_errorcall(R_NilValue, "Internal error: `template` must be a list."); } if (!OBJECT(data)) { Rf_errorcall(R_NilValue, "Internal error: `data` must be an object."); } if (!OBJECT(template_)) { Rf_errorcall(R_NilValue, "Internal error: `template` must be an object."); } bool seen_names = false; bool seen_row_names = false; // Pull the `names` and `row.names` off `data`. // These are the only 2 attributes from `data` that persist. SEXP names = R_NilValue; SEXP row_names = R_NilValue; for (SEXP node = ATTRIB(data); node != R_NilValue; node = CDR(node)) { SEXP tag = TAG(node); if (tag == R_NamesSymbol) { names = CAR(node); MARK_NOT_MUTABLE(names); seen_names = true; } if (tag == R_RowNamesSymbol) { row_names = CAR(node); MARK_NOT_MUTABLE(row_names); seen_row_names = true; } } if (!seen_names) { Rf_errorcall(R_NilValue, "Internal error: `data` must have a `names` attribute."); } if (!seen_row_names) { Rf_errorcall(R_NilValue, "Internal error: `data` must have a `row.names` attribute."); } seen_names = false; seen_row_names = false; // Now replace the `names` and `row.names` attributes in the `template_` // attributes with the ones from `data`. This attribute set becomes the final // one we set on `data`. SEXP attributes = ATTRIB(template_); attributes = PROTECT(Rf_shallow_duplicate(attributes)); for (SEXP node = attributes; node != R_NilValue; node = CDR(node)) { SEXP tag = TAG(node); if (tag == R_NamesSymbol) { SETCAR(node, names); seen_names = true; } if (tag == R_RowNamesSymbol) { SETCAR(node, row_names); seen_row_names = true; } } if (!seen_names) { Rf_errorcall(R_NilValue, "Internal error: `template` must have a `names` attribute."); } if (!seen_row_names) { Rf_errorcall(R_NilValue, "Internal error: `template` must have a `row.names` attribute."); } // Make an ALTREP wrapper if possible, since the underlying data doesn't change. // Won't actually make an ALTREP wrapper unless there are >64 columns // (internally controlled by R). #if R_VERSION >= R_Version(3, 6, 0) data = PROTECT(R_shallow_duplicate_attr(data)); #else data = PROTECT(Rf_shallow_duplicate(data)); #endif SET_ATTRIB(data, attributes); UNPROTECT(2); return data; } // Very unsafe wrappers needed for testing. // Bypass `Rf_getAttrib()` and `Rf_setAttrib()` calls to avoid forcing ALTREP // `row.names`. SEXP ffi_test_dplyr_attributes(SEXP x) { return ATTRIB(x); } SEXP ffi_test_dplyr_set_attributes(SEXP x, SEXP attributes) { if (TYPEOF(attributes) != LISTSXP) { Rf_errorcall(R_NilValue, "`attributes` must be a pairlist."); } x = PROTECT(Rf_shallow_duplicate(x)); SET_ATTRIB(x, attributes); UNPROTECT(1); return x; } dplyr/src/chop.cpp0000644000176200001440000001072514406402754013643 0ustar liggesusers#include "dplyr.h" SEXP new_environment(int size, SEXP parent) { SEXP call = PROTECT(Rf_lang4(dplyr::symbols::new_env, Rf_ScalarLogical(TRUE), parent, Rf_ScalarInteger(size))); SEXP res = Rf_eval(call, R_BaseEnv); UNPROTECT(1); return res; } void dplyr_lazy_vec_chop_grouped(SEXP chops_env, SEXP rows, SEXP data, bool rowwise) { SEXP names = PROTECT(Rf_getAttrib(data, R_NamesSymbol)); R_xlen_t n = XLENGTH(data); const SEXP* p_data = VECTOR_PTR_RO(data); const SEXP* p_names = STRING_PTR_RO(names); for (R_xlen_t i = 0; i < n; i++) { SEXP prom = PROTECT(Rf_allocSExp(PROMSXP)); SET_PRENV(prom, R_EmptyEnv); SEXP column = p_data[i]; if (rowwise && vctrs::obj_is_list(column)) { if (Rf_length(column) == 0) { SEXP ptype = PROTECT(Rf_getAttrib(column, Rf_install("ptype"))); column = PROTECT(Rf_allocVector(VECSXP, 1)); if (ptype != R_NilValue) { SET_VECTOR_ELT(column, 0, ptype); } else { // i.e. `vec_ptype_finalise(unspecified())` (#6369) SET_VECTOR_ELT(column, 0, Rf_allocVector(LGLSXP, 1)); } SET_PRCODE(prom, column); UNPROTECT(2); } else { SET_PRCODE(prom, column); } } else { SET_PRCODE(prom, Rf_lang3(dplyr::functions::vec_chop, column, rows)); } SET_PRVALUE(prom, R_UnboundValue); Rf_defineVar(rlang::str_as_symbol(p_names[i]), prom, chops_env); UNPROTECT(1); } UNPROTECT(1); } void dplyr_lazy_vec_chop_ungrouped(SEXP chops_env, SEXP data) { SEXP names = PROTECT(Rf_getAttrib(data, R_NamesSymbol)); R_xlen_t n = XLENGTH(data); const SEXP* p_data = VECTOR_PTR_RO(data); const SEXP* p_names = STRING_PTR_RO(names); for (R_xlen_t i = 0; i < n; i++) { SEXP prom = PROTECT(Rf_allocSExp(PROMSXP)); SET_PRENV(prom, R_EmptyEnv); SET_PRCODE(prom, Rf_lang2(dplyr::functions::list, p_data[i])); SET_PRVALUE(prom, R_UnboundValue); SEXP symb = rlang::str_as_symbol(p_names[i]); Rf_defineVar(symb, prom, chops_env); UNPROTECT(1); } UNPROTECT(1); } SEXP dplyr_lazy_vec_chop(SEXP data, SEXP rows, SEXP env_current_group_info, SEXP ffi_grouped, SEXP ffi_rowwise) { bool grouped = static_cast(LOGICAL_ELT(ffi_grouped, 0)); bool rowwise = static_cast(LOGICAL_ELT(ffi_rowwise, 0)); // An environment to hold the chops of the columns. // Parent environment contains information about current group id // and current group size, for use in mask binding evaluation. SEXP env_chops = PROTECT(new_environment(XLENGTH(data), env_current_group_info)); if (grouped) { dplyr_lazy_vec_chop_grouped(env_chops, rows, data, false); } else if (rowwise) { dplyr_lazy_vec_chop_grouped(env_chops, rows, data, true); } else { dplyr_lazy_vec_chop_ungrouped(env_chops, data); } UNPROTECT(1); return env_chops; } void add_mask_binding(SEXP name, SEXP env_mask_bindings, SEXP env_chops) { SEXP body = PROTECT(Rf_lang3(dplyr::functions::dot_subset2, name, dplyr::symbols::current_group_id)); SEXP fun = PROTECT(Rf_lang3(dplyr::functions::function, R_NilValue, body)); SEXP binding = PROTECT(Rf_eval(fun, env_chops)); R_MakeActiveBinding(name, binding, env_mask_bindings); UNPROTECT(3); } SEXP dplyr_make_mask_bindings(SEXP env_chops, SEXP data) { R_xlen_t n_columns = XLENGTH(data); SEXP names = PROTECT(Rf_getAttrib(data, R_NamesSymbol)); const SEXP* p_names = STRING_PTR_RO(names); // Create environment with one active binding per column. // Leave some extra room for new columns added by `dplyr_mask_binding_add()`. R_xlen_t size = n_columns + 20; SEXP env_mask_bindings = PROTECT(new_environment(size, R_EmptyEnv)); for (R_xlen_t i = 0; i < n_columns; i++) { SEXP name = PROTECT(rlang::str_as_symbol(p_names[i])); add_mask_binding(name, env_mask_bindings, env_chops); UNPROTECT(1); } UNPROTECT(2); return env_mask_bindings; } SEXP env_resolved(SEXP env, SEXP names) { R_xlen_t n = XLENGTH(names); SEXP res = PROTECT(Rf_allocVector(LGLSXP, n)); int* p_res = LOGICAL(res); const SEXP* p_names = STRING_PTR_RO(names); for(R_xlen_t i = 0; i < n; i++) { SEXP name = PROTECT(rlang::str_as_symbol(p_names[i])); SEXP prom = PROTECT(Rf_findVarInFrame(env, name)); SEXP val = TYPEOF(prom) == PROMSXP ? PRVALUE(prom) : prom; p_res[i] = val != R_UnboundValue; UNPROTECT(2); } Rf_namesgets(res, names); UNPROTECT(1); return res; } dplyr/src/dplyr.h0000644000176200001440000001542514525503021013502 0ustar liggesusers#ifndef DPLYR_DPLYR_H #define DPLYR_DPLYR_H #define R_NO_REMAP #include #include #include #include #define UTF8_MASK (1<<3) #define ASCII_MASK (1<<6) #define IS_ASCII(x) (LEVELS(x) & ASCII_MASK) #define IS_UTF8(x) (LEVELS(x) & UTF8_MASK) #if (R_VERSION < R_Version(3, 5, 0)) # define LOGICAL_RO(x) ((const int*) LOGICAL(x)) # define INTEGER_RO(x) ((const int*) INTEGER(x)) # define REAL_RO(x) ((const double*) REAL(x)) # define COMPLEX_RO(x) ((const Rcomplex*) COMPLEX(x)) # define STRING_PTR_RO(x) ((const SEXP*) STRING_PTR(x)) # define RAW_RO(x) ((const Rbyte*) RAW(x)) # define DATAPTR_RO(x) ((const void*) STRING_PTR(x)) #endif #define VECTOR_PTR_RO(x) ((const SEXP*) DATAPTR_RO(x)) namespace dplyr { struct envs { static SEXP ns_dplyr; static SEXP ns_vctrs; static SEXP ns_rlang; }; struct symbols { static SEXP groups; static SEXP levels; static SEXP ptype; static SEXP current_group_id; static SEXP current_group_size; static SEXP current_expression; static SEXP rows; static SEXP caller; static SEXP current_data; static SEXP dot_drop; static SEXP dplyr_internal_error; static SEXP dplyr_internal_signal; static SEXP chops; static SEXP obj_is_list; static SEXP new_env; static SEXP dot_data; static SEXP used; static SEXP across; static SEXP env_current_group_info; static SEXP env_mask_bindings; }; struct vectors { static SEXP classes_vctrs_list_of; static SEXP empty_int_vector; static SEXP names_expanded; static SEXP names_summarise_recycle_chunks; }; struct functions { static SEXP vec_chop; static SEXP dot_subset2; static SEXP list; static SEXP function; }; } // namespace dplyr namespace rlang { SEXP eval_tidy(SEXP expr, SEXP data, SEXP env); SEXP as_data_pronoun(SEXP x); SEXP new_data_mask(SEXP bottom, SEXP top); SEXP str_as_symbol(SEXP); SEXP quo_get_expr(SEXP quo); void env_unbind(SEXP, SEXP); } namespace vctrs { bool obj_is_vector(SEXP x) ; R_len_t short_vec_size(SEXP x) ; SEXP short_vec_recycle(SEXP x, R_len_t n); inline bool obj_is_list(SEXP x) { SEXP call = PROTECT(Rf_lang2(dplyr::symbols::obj_is_list, x)); SEXP res = Rf_eval(call, dplyr::envs::ns_vctrs); UNPROTECT(1); return LOGICAL(res)[0]; } } SEXP ffi_dplyr_reconstruct(SEXP data, SEXP template_); SEXP ffi_test_dplyr_attributes(SEXP x); SEXP ffi_test_dplyr_set_attributes(SEXP x, SEXP attributes); SEXP dplyr_expand_groups(SEXP old_groups, SEXP positions, SEXP s_nr); SEXP dplyr_cumall(SEXP x); SEXP dplyr_cumany(SEXP x); SEXP dplyr_cummean(SEXP x); SEXP dplyr_validate_grouped_df(SEXP df, SEXP s_check_bounds); SEXP dplyr_validate_rowwise_df(SEXP df); SEXP dplyr_mask_eval_all(SEXP quo, SEXP env_private); SEXP dplyr_mask_eval_all_summarise(SEXP quo, SEXP env_private); SEXP dplyr_mask_eval_all_mutate(SEXP quo, SEXP env_private); SEXP dplyr_mask_eval_all_filter(SEXP quos, SEXP env_private, SEXP s_n, SEXP env_filter); SEXP dplyr_summarise_recycle_chunks_in_place(SEXP list_of_chunks, SEXP list_of_result); SEXP dplyr_group_indices(SEXP data, SEXP rows); SEXP dplyr_group_keys(SEXP group_data); SEXP dplyr_mask_binding_remove(SEXP env_private, SEXP s_name); SEXP dplyr_mask_binding_add(SEXP env_private, SEXP s_name, SEXP ptype, SEXP chunks); SEXP dplyr_lazy_vec_chop(SEXP data, SEXP rows, SEXP env_current_group_info, SEXP ffi_grouped, SEXP ffi_rowwise); SEXP dplyr_make_mask_bindings(SEXP chops, SEXP data); SEXP env_resolved(SEXP env, SEXP names); void add_mask_binding(SEXP name, SEXP env_mask_bindings, SEXP env_chops); SEXP dplyr_extract_chunks(SEXP df_list, SEXP df_ptype); #define DPLYR_MASK_INIT() \ SEXP rows = PROTECT(Rf_findVarInFrame(env_private, dplyr::symbols::rows)); \ const SEXP* v_rows = VECTOR_PTR_RO(rows); \ R_xlen_t ngroups = XLENGTH(rows); \ SEXP caller = PROTECT(Rf_findVarInFrame(env_private, dplyr::symbols::caller)); \ SEXP env_mask_bindings = PROTECT(Rf_findVarInFrame(env_private, dplyr::symbols::env_mask_bindings)); \ SEXP pronoun = PROTECT(rlang::as_data_pronoun(env_mask_bindings)); \ SEXP env_current_group_info = PROTECT(Rf_findVarInFrame(env_private, dplyr::symbols::env_current_group_info)); \ SEXP current_group_id = PROTECT(Rf_findVarInFrame(env_current_group_info, dplyr::symbols::current_group_id)); \ int* p_current_group_id = INTEGER(current_group_id); \ *p_current_group_id = 0; \ SEXP current_group_size = PROTECT(Rf_findVarInFrame(env_current_group_info, dplyr::symbols::current_group_size)); \ int* p_current_group_size = INTEGER(current_group_size); \ *p_current_group_size = 0 #define DPLYR_MASK_FINALISE() \ UNPROTECT(7); \ *p_current_group_id = 0; \ *p_current_group_size = 0 // At each iteration, we create a fresh data mask so that lexical side effects, // such as using `<-` in a `mutate()`, don't persist between groups #define DPLYR_MASK_ITERATION_INIT() \ SEXP mask = PROTECT(rlang::new_data_mask(env_mask_bindings, R_NilValue)); \ Rf_defineVar(dplyr::symbols::dot_data, pronoun, mask) #define DPLYR_MASK_ITERATION_FINALISE() \ UNPROTECT(1) #define DPLYR_MASK_SET_GROUP(INDEX) \ *p_current_group_id = INDEX + 1; \ *p_current_group_size = Rf_xlength(v_rows[INDEX]) #define DPLYR_MASK_EVAL(quo) \ rlang::eval_tidy(quo, mask, caller) #define DPLYR_ERROR_INIT(n) \ SEXP error_data = PROTECT(Rf_allocVector(VECSXP, n)); \ SEXP error_names = PROTECT(Rf_allocVector(STRSXP, n)); \ Rf_setAttrib(error_data, R_NamesSymbol, error_names); #define DPLYR_ERROR_SET(i, name, value) \ SET_VECTOR_ELT(error_data, i, value); \ SET_STRING_ELT(error_names, i, Rf_mkChar(name)); #define DPLYR_ERROR_THROW(klass) \ SEXP error_class = PROTECT(Rf_mkString(klass)); \ SEXP error_call = PROTECT(Rf_lang3(dplyr::symbols::dplyr_internal_error, error_class, error_data)); \ Rf_eval(error_call, dplyr::envs::ns_dplyr); \ UNPROTECT(4) ; // for rchk #endif dplyr/src/filter.cpp0000644000176200001440000001360414406402754014176 0ustar liggesusers#include "dplyr.h" namespace dplyr { static inline void stop_filter_incompatible_size(R_xlen_t i, SEXP quos, R_xlen_t nres, R_xlen_t n) { DPLYR_ERROR_INIT(3); DPLYR_ERROR_SET(0, "index", Rf_ScalarInteger(i + 1)); DPLYR_ERROR_SET(1, "size", Rf_ScalarInteger(nres)); DPLYR_ERROR_SET(2, "expected_size", Rf_ScalarInteger(n)); DPLYR_ERROR_THROW("dplyr:::filter_incompatible_size"); } static inline void stop_filter_incompatible_type(R_xlen_t i, SEXP quos, SEXP column_name, SEXP result){ DPLYR_ERROR_INIT(3); DPLYR_ERROR_SET(0, "index", Rf_ScalarInteger(i + 1)); DPLYR_ERROR_SET(1, "column_name", column_name); DPLYR_ERROR_SET(2, "result", result); DPLYR_ERROR_THROW("dplyr:::filter_incompatible_type"); } static inline void signal_filter(const char* cls) { SEXP ffi_cls = PROTECT(Rf_mkString(cls)); SEXP ffi_call = PROTECT(Rf_lang2(dplyr::symbols::dplyr_internal_signal, ffi_cls)); Rf_eval(ffi_call, dplyr::envs::ns_dplyr); UNPROTECT(2); } static void signal_filter_one_column_matrix() { signal_filter("dplyr:::signal_filter_one_column_matrix"); } static void signal_filter_across() { signal_filter("dplyr:::signal_filter_across"); } static void signal_filter_data_frame() { signal_filter("dplyr:::signal_filter_data_frame"); } } // Reduces using logical `&` static inline void filter_lgl_reduce(SEXP x, R_xlen_t n, int* p_reduced) { const R_xlen_t n_x = Rf_xlength(x); const int* p_x = LOGICAL_RO(x); if (n_x == 1) { if (p_x[0] != TRUE) { for (R_xlen_t i = 0; i < n; ++i) { p_reduced[i] = FALSE; } } } else { for (R_xlen_t i = 0; i < n; ++i) { p_reduced[i] = (p_reduced[i] == TRUE) && (p_x[i] == TRUE); } } } static inline bool filter_is_valid_lgl(SEXP x, bool first) { if (TYPEOF(x) != LGLSXP) { return false; } SEXP dim = PROTECT(Rf_getAttrib(x, R_DimSymbol)); if (dim == R_NilValue) { // Bare logical vector UNPROTECT(1); return true; } const R_xlen_t dimensionality = Rf_xlength(dim); if (dimensionality == 1) { // 1 dimension array. We allow these because many things in R produce them. UNPROTECT(1); return true; } const int* p_dim = INTEGER(dim); if (dimensionality == 2 && p_dim[1] == 1) { // 1 column matrix. We allow these with a warning that this will be // deprecated in the future. if (first) { dplyr::signal_filter_one_column_matrix(); } UNPROTECT(1); return true; } UNPROTECT(1); return false; } static inline void filter_df_reduce(SEXP x, R_xlen_t n, bool first, R_xlen_t i_quo, SEXP quos, int* p_reduced) { if (first) { SEXP expr = rlang::quo_get_expr(VECTOR_ELT(quos, i_quo)); const bool across = TYPEOF(expr) == LANGSXP && CAR(expr) == dplyr::symbols::across; if (across) { dplyr::signal_filter_across(); } else { dplyr::signal_filter_data_frame(); } } const SEXP* p_x = VECTOR_PTR_RO(x); const R_xlen_t n_col = Rf_xlength(x); for (R_xlen_t i = 0; i < n_col; ++i) { SEXP col = p_x[i]; if (!filter_is_valid_lgl(col, first)) { SEXP names = PROTECT(Rf_getAttrib(x, R_NamesSymbol)); SEXP name = PROTECT(Rf_ScalarString(STRING_ELT(names, i))); dplyr::stop_filter_incompatible_type(i_quo, quos, name, col); UNPROTECT(2); } filter_lgl_reduce(col, n, p_reduced); } } static SEXP eval_filter_one(SEXP quos, SEXP mask, SEXP caller, R_xlen_t n, SEXP env_filter, bool first) { // Reduce to a single logical vector of size `n` SEXP reduced = PROTECT(Rf_allocVector(LGLSXP, n)); int* p_reduced = LOGICAL(reduced); // Init with `TRUE` for (R_xlen_t i = 0; i < n; ++i) { p_reduced[i] = TRUE; } const R_xlen_t n_quos = Rf_xlength(quos); SEXP const* p_quos = VECTOR_PTR_RO(quos); // Reduce loop for (R_xlen_t i = 0; i < n_quos; ++i) { SEXP current_expression = PROTECT(Rf_ScalarInteger(i + 1)); Rf_defineVar(dplyr::symbols::current_expression, current_expression, env_filter); SEXP res = PROTECT(rlang::eval_tidy(p_quos[i], mask, caller)); const R_xlen_t res_size = vctrs::short_vec_size(res); if (res_size != n && res_size != 1) { dplyr::stop_filter_incompatible_size(i, quos, res_size, n); } if (filter_is_valid_lgl(res, first)) { filter_lgl_reduce(res, n, p_reduced); } else if (Rf_inherits(res, "data.frame")) { filter_df_reduce(res, n, first, i, quos, p_reduced); } else { dplyr::stop_filter_incompatible_type(i, quos, R_NilValue, res); } UNPROTECT(2); } UNPROTECT(1); return reduced; } SEXP dplyr_mask_eval_all_filter(SEXP quos, SEXP env_private, SEXP s_n, SEXP env_filter) { DPLYR_MASK_INIT(); const SEXP* p_rows = VECTOR_PTR_RO(rows); const R_xlen_t n = Rf_asInteger(s_n); SEXP keep = PROTECT(Rf_allocVector(LGLSXP, n)); int* p_keep = LOGICAL(keep); for (R_xlen_t i = 0; i < ngroups; ++i) { DPLYR_MASK_ITERATION_INIT(); DPLYR_MASK_SET_GROUP(i); const bool first = i == 0; SEXP rows_i = p_rows[i]; R_xlen_t n_i = Rf_xlength(rows_i); SEXP result_i = PROTECT(eval_filter_one( quos, mask, caller, n_i, env_filter, first )); const int* p_rows_i = INTEGER(rows_i); const int* p_result_i = LOGICAL(result_i); for (R_xlen_t j = 0; j < n_i; ++j) { p_keep[p_rows_i[j] - 1] = p_result_i[j]; } UNPROTECT(1); DPLYR_MASK_ITERATION_FINALISE(); } UNPROTECT(1); DPLYR_MASK_FINALISE(); return keep; } dplyr/vignettes/0000755000176200001440000000000014525507110013414 5ustar liggesusersdplyr/vignettes/rowwise.Rmd0000644000176200001440000003357114420040360015561 0ustar liggesusers--- title: "Row-wise operations" description: > In R, it's usually easier to do something for each column than for each row. In this vignette you will learn how to use the `rowwise()` function to perform operations by row. Along the way, you'll learn about list-columns, and see how you might perform simulations and modelling within dplyr verbs. output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Row-wise operations} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} knitr::opts_chunk$set(collapse = T, comment = "#>") options(tibble.print_min = 4L, tibble.print_max = 4L) ``` dplyr, and R in general, are particularly well suited to performing operations over columns, and performing operations over rows is much harder. In this vignette, you'll learn dplyr's approach centred around the row-wise data frame created by `rowwise()`. There are three common use cases that we discuss in this vignette: * Row-wise aggregates (e.g. compute the mean of x, y, z). * Calling a function multiple times with varying arguments. * Working with list-columns. These types of problems are often easily solved with a for loop, but it's nice to have a solution that fits naturally into a pipeline. > Of course, someone has to write loops. It doesn't have to be you. > --- Jenny Bryan ```{r setup} library(dplyr, warn.conflicts = FALSE) ``` ```{r include = FALSE} nest_by <- function(df, ...) { df %>% group_by(...) %>% summarise(data = list(pick(everything()))) %>% rowwise(...) } # mtcars %>% nest_by(cyl) ``` ## Creating Row-wise operations require a special type of grouping where each group consists of a single row. You create this with `rowwise()`: ```{r} df <- tibble(x = 1:2, y = 3:4, z = 5:6) df %>% rowwise() ``` Like `group_by()`, `rowwise()` doesn't really do anything itself; it just changes how the other verbs work. For example, compare the results of `mutate()` in the following code: ```{r} df %>% mutate(m = mean(c(x, y, z))) df %>% rowwise() %>% mutate(m = mean(c(x, y, z))) ``` If you use `mutate()` with a regular data frame, it computes the mean of `x`, `y`, and `z` across all rows. If you apply it to a row-wise data frame, it computes the mean for each row. You can optionally supply "identifier" variables in your call to `rowwise()`. These variables are preserved when you call `summarise()`, so they behave somewhat similarly to the grouping variables passed to `group_by()`: ```{r} df <- tibble(name = c("Mara", "Hadley"), x = 1:2, y = 3:4, z = 5:6) df %>% rowwise() %>% summarise(m = mean(c(x, y, z))) df %>% rowwise(name) %>% summarise(m = mean(c(x, y, z))) ``` `rowwise()` is just a special form of grouping, so if you want to remove it from a data frame, just call `ungroup()`. ## Per row summary statistics `dplyr::summarise()` makes it really easy to summarise values across rows within one column. When combined with `rowwise()` it also makes it easy to summarise values across columns within one row. To see how, we'll start by making a little dataset: ```{r} df <- tibble(id = 1:6, w = 10:15, x = 20:25, y = 30:35, z = 40:45) df ``` Let's say we want compute the sum of `w`, `x`, `y`, and `z` for each row. We start by making a row-wise data frame: ```{r} rf <- df %>% rowwise(id) ``` We can then use `mutate()` to add a new column to each row, or `summarise()` to return just that one summary: ```{r} rf %>% mutate(total = sum(c(w, x, y, z))) rf %>% summarise(total = sum(c(w, x, y, z))) ``` Of course, if you have a lot of variables, it's going to be tedious to type in every variable name. Instead, you can use `c_across()` which uses tidy selection syntax so you can to succinctly select many variables: ```{r} rf %>% mutate(total = sum(c_across(w:z))) rf %>% mutate(total = sum(c_across(where(is.numeric)))) ``` You could combine this with column-wise operations (see `vignette("colwise")` for more details) to compute the proportion of the total for each column: ```{r} rf %>% mutate(total = sum(c_across(w:z))) %>% ungroup() %>% mutate(across(w:z, ~ . / total)) ``` ### Row-wise summary functions The `rowwise()` approach will work for any summary function. But if you need greater speed, it's worth looking for a built-in row-wise variant of your summary function. These are more efficient because they operate on the data frame as whole; they don't split it into rows, compute the summary, and then join the results back together again. ```{r} df %>% mutate(total = rowSums(pick(where(is.numeric), -id))) df %>% mutate(mean = rowMeans(pick(where(is.numeric), -id))) ``` **NB**: I use `df` (not `rf`) and `pick()` (not `c_across()`) here because `rowMeans()` and `rowSums()` take a multi-row data frame as input. Also note that `-id` is needed to avoid selecting `id` in `pick()`. This wasn't required with the rowwise data frame because we had specified `id` as an identifier in our original call to `rowwise()`, preventing it from being selected as a grouping column. ```{r, eval = FALSE, include = FALSE} bench::mark( df %>% mutate(m = rowSums(pick(x:z))), df %>% mutate(m = apply(pick(x:z), 1, sum)), df %>% rowwise() %>% mutate(m = sum(pick(x:z))), check = FALSE ) ``` ## List-columns `rowwise()` operations are a natural pairing when you have list-columns. They allow you to avoid explicit loops and/or functions from the `apply()` or `purrr::map()` families. ### Motivation Imagine you have this data frame, and you want to count the lengths of each element: ```{r} df <- tibble( x = list(1, 2:3, 4:6) ) ``` You might try calling `length()`: ```{r} df %>% mutate(l = length(x)) ``` But that returns the length of the column, not the length of the individual values. If you're an R documentation aficionado, you might know there's already a base R function just for this purpose: ```{r} df %>% mutate(l = lengths(x)) ``` Or if you're an experienced R programmer, you might know how to apply a function to each element of a list using `sapply()`, `vapply()`, or one of the purrr `map()` functions: ```{r} df %>% mutate(l = sapply(x, length)) df %>% mutate(l = purrr::map_int(x, length)) ``` But wouldn't it be nice if you could just write `length(x)` and dplyr would figure out that you wanted to compute the length of the element inside of `x`? Since you're here, you might already be guessing at the answer: this is just another application of the row-wise pattern. ```{r} df %>% rowwise() %>% mutate(l = length(x)) ``` ### Subsetting Before we continue on, I wanted to briefly mention the magic that makes this work. This isn't something you'll generally need to think about (it'll just work), but it's useful to know about when something goes wrong. There's an important difference between a grouped data frame where each group happens to have one row, and a row-wise data frame where every group always has one row. Take these two data frames: ```{r} df <- tibble(g = 1:2, y = list(1:3, "a")) gf <- df %>% group_by(g) rf <- df %>% rowwise(g) ``` If we compute some properties of `y`, you'll notice the results look different: ```{r} gf %>% mutate(type = typeof(y), length = length(y)) rf %>% mutate(type = typeof(y), length = length(y)) ``` They key difference is that when `mutate()` slices up the columns to pass to `length(y)` the grouped mutate uses `[` and the row-wise mutate uses `[[`. The following code gives a flavour of the differences if you used a for loop: ```{r} # grouped out1 <- integer(2) for (i in 1:2) { out1[[i]] <- length(df$y[i]) } out1 # rowwise out2 <- integer(2) for (i in 1:2) { out2[[i]] <- length(df$y[[i]]) } out2 ``` Note that this magic only applies when you're referring to existing columns, not when you're creating new rows. This is potentially confusing, but we're fairly confident it's the least worst solution, particularly given the hint in the error message. ```{r, error = TRUE} gf %>% mutate(y2 = y) rf %>% mutate(y2 = y) rf %>% mutate(y2 = list(y)) ``` ### Modelling `rowwise()` data frames allow you to solve a variety of modelling problems in what I think is a particularly elegant way. We'll start by creating a nested data frame: ```{r} by_cyl <- mtcars %>% nest_by(cyl) by_cyl ``` This is a little different to the usual `group_by()` output: we have visibly changed the structure of the data. Now we have three rows (one for each group), and we have a list-col, `data`, that stores the data for that group. Also note that the output is `rowwise()`; this is important because it's going to make working with that list of data frames much easier. Once we have one data frame per row, it's straightforward to make one model per row: ```{r} mods <- by_cyl %>% mutate(mod = list(lm(mpg ~ wt, data = data))) mods ``` And supplement that with one set of predictions per row: ```{r} mods <- mods %>% mutate(pred = list(predict(mod, data))) mods ``` You could then summarise the model in a variety of ways: ```{r} mods %>% summarise(rmse = sqrt(mean((pred - data$mpg) ^ 2))) mods %>% summarise(rsq = summary(mod)$r.squared) mods %>% summarise(broom::glance(mod)) ``` Or easily access the parameters of each model: ```{r} mods %>% reframe(broom::tidy(mod)) ``` ## Repeated function calls `rowwise()` doesn't just work with functions that return a length-1 vector (aka summary functions); it can work with any function if the result is a list. This means that `rowwise()` and `mutate()` provide an elegant way to call a function many times with varying arguments, storing the outputs alongside the inputs. ### Simulations I think this is a particularly elegant way to perform simulations, because it lets you store simulated values along with the parameters that generated them. For example, imagine you have the following data frame that describes the properties of 3 samples from the uniform distribution: ```{r} df <- tribble( ~ n, ~ min, ~ max, 1, 0, 1, 2, 10, 100, 3, 100, 1000, ) ``` You can supply these parameters to `runif()` by using `rowwise()` and `mutate()`: ```{r} df %>% rowwise() %>% mutate(data = list(runif(n, min, max))) ``` Note the use of `list()` here - `runif()` returns multiple values and a `mutate()` expression has to return something of length 1. `list()` means that we'll get a list column where each row is a list containing multiple values. If you forget to use `list()`, dplyr will give you a hint: ```{r, error = TRUE} df %>% rowwise() %>% mutate(data = runif(n, min, max)) ``` ### Multiple combinations What if you want to call a function for every combination of inputs? You can use `expand.grid()` (or `tidyr::expand_grid()`) to generate the data frame and then repeat the same pattern as above: ```{r} df <- expand.grid(mean = c(-1, 0, 1), sd = c(1, 10, 100)) df %>% rowwise() %>% mutate(data = list(rnorm(10, mean, sd))) ``` ### Varying functions In more complicated problems, you might also want to vary the function being called. This tends to be a bit more of an awkward fit with this approach because the columns in the input tibble will be less regular. But it's still possible, and it's a natural place to use `do.call()`: ```{r} df <- tribble( ~rng, ~params, "runif", list(n = 10), "rnorm", list(n = 20), "rpois", list(n = 10, lambda = 5), ) %>% rowwise() df %>% mutate(data = list(do.call(rng, params))) ``` ```{r, include = FALSE, eval = FALSE} df <- rowwise(tribble( ~rng, ~params, "runif", list(min = -1, max = 1), "rnorm", list(), "rpois", list(lambda = 5), )) # Has to happen in separate function to avoid eager unquoting f <- function(rng, params) purrr::exec(rng, n = 10, !!!params) df %>% mutate(data = list(f(rng, params))) ``` ## Previously ### `rowwise()` `rowwise()` was also questioning for quite some time, partly because I didn't appreciate how many people needed the native ability to compute summaries across multiple variables for each row. As an alternative, we recommended performing row-wise operations with the purrr `map()` functions. However, this was challenging because you needed to pick a map function based on the number of arguments that were varying and the type of result, which required quite some knowledge of purrr functions. I was also resistant to `rowwise()` because I felt like automatically switching between `[` to `[[` was too magical in the same way that automatically `list()`-ing results made `do()` too magical. I've now persuaded myself that the row-wise magic is good magic partly because most people find the distinction between `[` and `[[` mystifying and `rowwise()` means that you don't need to think about it. Since `rowwise()` clearly is useful it is not longer questioning, and we expect it to be around for the long term. ### `do()` We've questioned the need for `do()` for quite some time, because it never felt very similar to the other dplyr verbs. It had two main modes of operation: * Without argument names: you could call functions that input and output data frames using `.` to refer to the "current" group. For example, the following code gets the first row of each group: ```{r} mtcars %>% group_by(cyl) %>% do(head(., 1)) ``` This has been superseded by `pick()` plus `reframe()`, a variant of `summarise()` that can create multiple rows and columns per group. ```{r} mtcars %>% group_by(cyl) %>% reframe(head(pick(everything()), 1)) ``` * With arguments: it worked like `mutate()` but automatically wrapped every element in a list: ```{r} mtcars %>% group_by(cyl) %>% do(nrows = nrow(.)) ``` I now believe that behaviour is both too magical and not very useful, and it can be replaced by `summarise()` and `pick()`. ```{r} mtcars %>% group_by(cyl) %>% summarise(nrows = nrow(pick(everything()))) ``` If needed (unlike here), you can wrap the results in a list yourself. The addition of `pick()`/`across()` and the increased scope of `summarise()`/`reframe()` means that `do()` is no longer needed, so it is now superseded. dplyr/vignettes/window-functions.Rmd0000644000176200001440000002233614266276767017432 0ustar liggesusers--- title: "Window functions" description: > Window functions are a useful family of functions that work with vectors (returning an output the same size as the input), and combine naturally with `mutate()` and `filter()`. output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Window functions} %\VignetteEngine{knitr::rmarkdown} %\usepackage[utf8]{inputenc} --- ```{r, include = FALSE} knitr::opts_chunk$set(collapse = T, comment = "#>") options(tibble.print_min = 4L, tibble.print_max = 4L) library(dplyr) set.seed(1014) if (!rlang::is_installed("Lahman")) { knitr::opts_chunk$set(eval = FALSE) } ``` A __window function__ is a variation on an aggregation function. Where an aggregation function, like `sum()` and `mean()`, takes n inputs and return a single value, a window function returns n values. The output of a window function depends on all its input values, so window functions don't include functions that work element-wise, like `+` or `round()`. Window functions include variations on aggregate functions, like `cumsum()` and `cummean()`, functions for ranking and ordering, like `rank()`, and functions for taking offsets, like `lead()` and `lag()`. In this vignette, we'll use a small sample of the Lahman batting dataset, including the players that have won an award. ```{r} library(Lahman) batting <- Lahman::Batting %>% as_tibble() %>% select(playerID, yearID, teamID, G, AB:H) %>% arrange(playerID, yearID, teamID) %>% semi_join(Lahman::AwardsPlayers, by = "playerID") players <- batting %>% group_by(playerID) ``` Window functions are used in conjunction with `mutate()` and `filter()` to solve a wide range of problems. Here's a selection: ```{r, eval = FALSE} # For each player, find the two years with most hits filter(players, min_rank(desc(H)) <= 2 & H > 0) # Within each player, rank each year by the number of games played mutate(players, G_rank = min_rank(G)) # For each player, find every year that was better than the previous year filter(players, G > lag(G)) # For each player, compute avg change in games played per year mutate(players, G_change = (G - lag(G)) / (yearID - lag(yearID))) # For each player, find all years where they played more games than they did on average filter(players, G > mean(G)) # For each, player compute a z score based on number of games played mutate(players, G_z = (G - mean(G)) / sd(G)) ``` Before reading this vignette, you should be familiar with `mutate()` and `filter()`. ## Types of window functions There are five main families of window functions. Two families are unrelated to aggregation functions: * Ranking and ordering functions: `row_number()`, `min_rank()`, `dense_rank()`, `cume_dist()`, `percent_rank()`, and `ntile()`. These functions all take a vector to order by, and return various types of ranks. * Offsets `lead()` and `lag()` allow you to access the previous and next values in a vector, making it easy to compute differences and trends. The other three families are variations on familiar aggregate functions: * Cumulative aggregates: `cumsum()`, `cummin()`, `cummax()` (from base R), and `cumall()`, `cumany()`, and `cummean()` (from dplyr). * Rolling aggregates operate in a fixed width window. You won't find them in base R or in dplyr, but there are many implementations in other packages, such as [RcppRoll](https://cran.r-project.org/package=RcppRoll). * Recycled aggregates, where an aggregate is repeated to match the length of the input. These are not needed in R because vector recycling automatically recycles aggregates where needed. They are important in SQL, because the presence of an aggregation function usually tells the database to return only one row per group. Each family is described in more detail below, focussing on the general goals and how to use them with dplyr. For more details, refer to the individual function documentation. ## Ranking functions The ranking functions are variations on a theme, differing in how they handle ties: ```{r} x <- c(1, 1, 2, 2, 2) row_number(x) min_rank(x) dense_rank(x) ``` If you're familiar with R, you may recognise that `row_number()` and `min_rank()` can be computed with the base `rank()` function and various values of the `ties.method` argument. These functions are provided to save a little typing, and to make it easier to convert between R and SQL. Two other ranking functions return numbers between 0 and 1. `percent_rank()` gives the percentage of the rank; `cume_dist()` gives the proportion of values less than or equal to the current value. ```{r} cume_dist(x) percent_rank(x) ``` These are useful if you want to select (for example) the top 10% of records within each group. For example: ```{r} filter(players, cume_dist(desc(G)) < 0.1) ``` Finally, `ntile()` divides the data up into `n` evenly sized buckets. It's a coarse ranking, and it can be used in with `mutate()` to divide the data into buckets for further summary. For example, we could use `ntile()` to divide the players within a team into four ranked groups, and calculate the average number of games within each group. ```{r} by_team_player <- group_by(batting, teamID, playerID) by_team <- summarise(by_team_player, G = sum(G)) by_team_quartile <- group_by(by_team, quartile = ntile(G, 4)) summarise(by_team_quartile, mean(G)) ``` All ranking functions rank from lowest to highest so that small input values get small ranks. Use `desc()` to rank from highest to lowest. ## Lead and lag `lead()` and `lag()` produce offset versions of a input vector that is either ahead of or behind the original vector. ```{r} x <- 1:5 lead(x) lag(x) ``` You can use them to: * Compute differences or percent changes. ```{r, results = "hide"} # Compute the relative change in games played mutate(players, G_delta = G - lag(G)) ``` Using `lag()` is more convenient than `diff()` because for `n` inputs `diff()` returns `n - 1` outputs. * Find out when a value changes. ```{r, results = "hide"} # Find when a player changed teams filter(players, teamID != lag(teamID)) ``` `lead()` and `lag()` have an optional argument `order_by`. If set, instead of using the row order to determine which value comes before another, they will use another variable. This is important if you have not already sorted the data, or you want to sort one way and lag another. Here's a simple example of what happens if you don't specify `order_by` when you need it: ```{r} df <- data.frame(year = 2000:2005, value = (0:5) ^ 2) scrambled <- df[sample(nrow(df)), ] wrong <- mutate(scrambled, prev_value = lag(value)) arrange(wrong, year) right <- mutate(scrambled, prev_value = lag(value, order_by = year)) arrange(right, year) ``` ## Cumulative aggregates Base R provides cumulative sum (`cumsum()`), cumulative min (`cummin()`), and cumulative max (`cummax()`). (It also provides `cumprod()` but that is rarely useful). Other common accumulating functions are `cumany()` and `cumall()`, cumulative versions of `||` and `&&`, and `cummean()`, a cumulative mean. These are not included in base R, but efficient versions are provided by `dplyr`. `cumany()` and `cumall()` are useful for selecting all rows up to, or all rows after, a condition is true for the first (or last) time. For example, we can use `cumany()` to find all records for a player after they played a year with 150 games: ```{r, eval = FALSE} filter(players, cumany(G > 150)) ``` Like lead and lag, you may want to control the order in which the accumulation occurs. None of the built in functions have an `order_by` argument so `dplyr` provides a helper: `order_by()`. You give it the variable you want to order by, and then the call to the window function: ```{r} x <- 1:10 y <- 10:1 order_by(y, cumsum(x)) ``` This function uses a bit of non-standard evaluation, so I wouldn't recommend using it inside another function; use the simpler but less concise `with_order()` instead. ## Recycled aggregates R's vector recycling makes it easy to select values that are higher or lower than a summary. I call this a recycled aggregate because the value of the aggregate is recycled to be the same length as the original vector. Recycled aggregates are useful if you want to find all records greater than the mean or less than the median: ```{r, eval = FALSE} filter(players, G > mean(G)) filter(players, G < median(G)) ``` While most SQL databases don't have an equivalent of `median()` or `quantile()`, when filtering you can achieve the same effect with `ntile()`. For example, `x > median(x)` is equivalent to `ntile(x, 2) == 2`; `x > quantile(x, 75)` is equivalent to `ntile(x, 100) > 75` or `ntile(x, 4) > 3`. ```{r, eval = FALSE} filter(players, ntile(G, 2) == 2) ``` You can also use this idea to select the records with the highest (`x == max(x)`) or lowest value (`x == min(x)`) for a field, but the ranking functions give you more control over ties, and allow you to select any number of records. Recycled aggregates are also useful in conjunction with `mutate()`. For example, with the batting data, we could compute the "career year", the number of years a player has played since they entered the league: ```{r} mutate(players, career_year = yearID - min(yearID) + 1) ``` Or, as in the introductory example, we could compute a z-score: ```{r} mutate(players, G_z = (G - mean(G)) / sd(G)) ``` dplyr/vignettes/base.Rmd0000644000176200001440000002744414406402754015013 0ustar liggesusers--- title: "dplyr <-> base R" output: rmarkdown::html_vignette description: > How does dplyr compare to base R? This vignette describes the main differences in philosophy, and shows the base R code most closely equivalent to each dplyr verb. vignette: > %\VignetteIndexEntry{dplyr <-> base R} %\VignetteEngine{knitr::rmarkdown} \usepackage[utf8]{inputenc} --- ```{r, echo = FALSE, message = FALSE} knitr::opts_chunk$set(collapse = T, comment = "#>") options(tibble.print_min = 4, tibble.print_max = 4) ``` This vignette compares dplyr functions to their base R equivalents. This helps those familiar with base R understand better what dplyr does, and shows dplyr users how you might express the same ideas in base R code. We'll start with a rough overview of the major differences, then discuss the one table verbs in more detail, followed by the two table verbs. # Overview 1. The code dplyr verbs input and output data frames. This contrasts with base R functions which more frequently work with individual vectors. 1. dplyr relies heavily on "non-standard evaluation" so that you don't need to use `$` to refer to columns in the "current" data frame. This behaviour is inspired by the base functions `subset()` and `transform()`. 1. dplyr solutions tend to use a variety of single purpose verbs, while base R solutions typically tend to use `[` in a variety of ways, depending on the task at hand. 1. Multiple dplyr verbs are often strung together into a pipeline by `%>%`. In base R, you'll typically save intermediate results to a variable that you either discard, or repeatedly overwrite. 1. All dplyr verbs handle "grouped" data frames so that the code to perform a computation per-group looks very similar to code that works on a whole data frame. In base R, per-group operations tend to have varied forms. # One table verbs The following table shows a condensed translation between dplyr verbs and their base R equivalents. The following sections describe each operation in more detail. You'll learn more about the dplyr verbs in their documentation and in `vignette("dplyr")`. | dplyr | base | |------------------------------- |--------------------------------------------------| | `arrange(df, x)` | `df[order(x), , drop = FALSE]` | | `distinct(df, x)` | `df[!duplicated(x), , drop = FALSE]`, `unique()` | | `filter(df, x)` | `df[which(x), , drop = FALSE]`, `subset()` | | `mutate(df, z = x + y)` | `df$z <- df$x + df$y`, `transform()` | | `pull(df, 1)` | `df[[1]]` | | `pull(df, x)` | `df$x` | | `rename(df, y = x)` | `names(df)[names(df) == "x"] <- "y"` | | `relocate(df, y)` | `df[union("y", names(df))]` | | `select(df, x, y)` | `df[c("x", "y")]`, `subset()` | | `select(df, starts_with("x"))` | `df[grepl("^x", names(df))]` | | `summarise(df, mean(x))` | `mean(df$x)`, `tapply()`, `aggregate()`, `by()` | | `slice(df, c(1, 2, 5))` | `df[c(1, 2, 5), , drop = FALSE]` | To begin, we'll load dplyr and convert `mtcars` and `iris` to tibbles so that we can easily show only abbreviated output for each operation. ```{r setup, message = FALSE} library(dplyr) mtcars <- as_tibble(mtcars) iris <- as_tibble(iris) ``` ## `arrange()`: Arrange rows by variables `dplyr::arrange()` orders the rows of a data frame by the values of one or more columns: ```{r} mtcars %>% arrange(cyl, disp) ``` The `desc()` helper allows you to order selected variables in descending order: ```{r} mtcars %>% arrange(desc(cyl), desc(disp)) ``` We can replicate in base R by using `[` with `order()`: ```{r} mtcars[order(mtcars$cyl, mtcars$disp), , drop = FALSE] ``` Note the use of `drop = FALSE`. If you forget this, and the input is a data frame with a single column, the output will be a vector, not a data frame. This is a source of subtle bugs. Base R does not provide a convenient and general way to sort individual variables in descending order, so you have two options: * For numeric variables, you can use `-x`. * You can request `order()` to sort all variables in descending order. ```{r, results = FALSE} mtcars[order(mtcars$cyl, mtcars$disp, decreasing = TRUE), , drop = FALSE] mtcars[order(-mtcars$cyl, -mtcars$disp), , drop = FALSE] ``` ## `distinct()`: Select distinct/unique rows `dplyr::distinct()` selects unique rows: ```{r} df <- tibble( x = sample(10, 100, rep = TRUE), y = sample(10, 100, rep = TRUE) ) df %>% distinct(x) # selected columns df %>% distinct(x, .keep_all = TRUE) # whole data frame ``` There are two equivalents in base R, depending on whether you want the whole data frame, or just selected variables: ```{r} unique(df["x"]) # selected columns df[!duplicated(df$x), , drop = FALSE] # whole data frame ``` ## `filter()`: Return rows with matching conditions `dplyr::filter()` selects rows where an expression is `TRUE`: ```{r} starwars %>% filter(species == "Human") starwars %>% filter(mass > 1000) starwars %>% filter(hair_color == "none" & eye_color == "black") ``` The closest base equivalent (and the inspiration for `filter()`) is `subset()`: ```{r} subset(starwars, species == "Human") subset(starwars, mass > 1000) subset(starwars, hair_color == "none" & eye_color == "black") ``` You can also use `[` but this also requires the use of `which()` to remove `NA`s: ```{r} starwars[which(starwars$species == "Human"), , drop = FALSE] starwars[which(starwars$mass > 1000), , drop = FALSE] starwars[which(starwars$hair_color == "none" & starwars$eye_color == "black"), , drop = FALSE] ``` ## `mutate()`: Create or transform variables `dplyr::mutate()` creates new variables from existing variables: ```{r} df %>% mutate(z = x + y, z2 = z ^ 2) ``` The closest base equivalent is `transform()`, but note that it cannot use freshly created variables: ```{r} head(transform(df, z = x + y, z2 = (x + y) ^ 2)) ``` Alternatively, you can use `$<-`: ```{r} mtcars$cyl2 <- mtcars$cyl * 2 mtcars$cyl4 <- mtcars$cyl2 * 2 ``` When applied to a grouped data frame, `dplyr::mutate()` computes new variable once per group: ```{r} gf <- tibble(g = c(1, 1, 2, 2), x = c(0.5, 1.5, 2.5, 3.5)) gf %>% group_by(g) %>% mutate(x_mean = mean(x), x_rank = rank(x)) ``` To replicate this in base R, you can use `ave()`: ```{r} transform(gf, x_mean = ave(x, g, FUN = mean), x_rank = ave(x, g, FUN = rank) ) ``` ## `pull()`: Pull out a single variable `dplyr::pull()` extracts a variable either by name or position: ```{r} mtcars %>% pull(1) mtcars %>% pull(cyl) ``` This equivalent to `[[` for positions and `$` for names: ```{r} mtcars[[1]] mtcars$cyl ``` ## `relocate()`: Change column order `dplyr::relocate()` makes it easy to move a set of columns to a new position (by default, the front): ```{r} # to front mtcars %>% relocate(gear, carb) # to back mtcars %>% relocate(mpg, cyl, .after = last_col()) ``` We can replicate this in base R with a little set manipulation: ```{r} mtcars[union(c("gear", "carb"), names(mtcars))] to_back <- c("mpg", "cyl") mtcars[c(setdiff(names(mtcars), to_back), to_back)] ``` Moving columns to somewhere in the middle requires a little more set twiddling. ## `rename()`: Rename variables by name `dplyr::rename()` allows you to rename variables by name or position: ```{r} iris %>% rename(sepal_length = Sepal.Length, sepal_width = 2) ``` Renaming variables by position is straight forward in base R: ```{r} iris2 <- iris names(iris2)[2] <- "sepal_width" ``` Renaming variables by name requires a bit more work: ```{r} names(iris2)[names(iris2) == "Sepal.Length"] <- "sepal_length" ``` ## `rename_with()`: Rename variables with a function `dplyr::rename_with()` transform column names with a function: ```{r} iris %>% rename_with(toupper) ``` A similar effect can be achieved with `setNames()` in base R: ```{r} setNames(iris, toupper(names(iris))) ``` ## `select()`: Select variables by name `dplyr::select()` subsets columns by position, name, function of name, or other property: ```{r} iris %>% select(1:3) iris %>% select(Species, Sepal.Length) iris %>% select(starts_with("Petal")) iris %>% select(where(is.factor)) ``` Subsetting variables by position is straightforward in base R: ```{r} iris[1:3] # single argument selects columns; never drops iris[1:3, , drop = FALSE] ``` You have two options to subset by name: ```{r} iris[c("Species", "Sepal.Length")] subset(iris, select = c(Species, Sepal.Length)) ``` Subsetting by function of name requires a bit of work with `grep()`: ```{r} iris[grep("^Petal", names(iris))] ``` And you can use `Filter()` to subset by type: ```{r} Filter(is.factor, iris) ``` ## `summarise()`: Reduce multiple values down to a single value `dplyr::summarise()` computes one or more summaries for each group: ```{r} mtcars %>% group_by(cyl) %>% summarise(mean = mean(disp), n = n()) ``` I think the closest base R equivalent uses `by()`. Unfortunately `by()` returns a list of data frames, but you can combine them back together again with `do.call()` and `rbind()`: ```{r} mtcars_by <- by(mtcars, mtcars$cyl, function(df) { with(df, data.frame(cyl = cyl[[1]], mean = mean(disp), n = nrow(df))) }) do.call(rbind, mtcars_by) ``` `aggregate()` comes very close to providing an elegant answer: ```{r} agg <- aggregate(disp ~ cyl, mtcars, function(x) c(mean = mean(x), n = length(x))) agg ``` But unfortunately while it looks like there are `disp.mean` and `disp.n` columns, it's actually a single matrix column: ```{r} str(agg) ``` You can see a variety of other options at . ## `slice()`: Choose rows by position `slice()` selects rows with their location: ```{r} slice(mtcars, 25:n()) ``` This is straightforward to replicate with `[`: ```{r} mtcars[25:nrow(mtcars), , drop = FALSE] ``` # Two-table verbs When we want to merge two data frames, `x` and `y`), we have a variety of different ways to bring them together. Various base R `merge()` calls are replaced by a variety of dplyr `join()` functions. | dplyr | base | |------------------------|-----------------------------------------| | `inner_join(df1, df2)` |`merge(df1, df2)` | | `left_join(df1, df2) ` |`merge(df1, df2, all.x = TRUE)` | | `right_join(df1, df2)` |`merge(df1, df2, all.y = TRUE)` | | `full_join(df1, df2)` |`merge(df1, df2, all = TRUE)` | | `semi_join(df1, df2)` |`df1[df1$x %in% df2$x, , drop = FALSE]` | | `anti_join(df1, df2)` |`df1[!df1$x %in% df2$x, , drop = FALSE]` | For more information about two-table verbs, see `vignette("two-table")`. ### Mutating joins dplyr's `inner_join()`, `left_join()`, `right_join()`, and `full_join()` add new columns from `y` to `x`, matching rows based on a set of "keys", and differ only in how missing matches are handled. They are equivalent to calls to `merge()` with various settings of the `all`, `all.x`, and `all.y` arguments. The main difference is the order of the rows: * dplyr preserves the order of the `x` data frame. * `merge()` sorts the key columns. ### Filtering joins dplyr's `semi_join()` and `anti_join()` affect only the rows, not the columns: ```{r} band_members %>% semi_join(band_instruments) band_members %>% anti_join(band_instruments) ``` They can be replicated in base R with `[` and `%in%`: ```{r} band_members[band_members$name %in% band_instruments$name, , drop = FALSE] band_members[!band_members$name %in% band_instruments$name, , drop = FALSE] ``` Semi and anti joins with multiple key variables are considerably more challenging to implement. dplyr/vignettes/compatibility.html0000644000176200001440000007574413577426157017215 0ustar liggesusers dplyr compatibility

dplyr compatibility

This vignette is aimed at package authors who need to update their code because of a backward incompatible change to dplyr. We do try and minimise backward incompatible changes as much as possible, but sometimes they are necessary in order to radically simplify existing code, or unlock a lot of potential value in the future.

This vignette starts with some general advice on writing package code that works with multiple version of dplyr, then continues to discuss specific changes in dplyr versions.

Working with multiple dplyr versions

Ideally, you want to make sure that your package works with both the released version and the development version of dplyr. This is typically a little bit more work, but has two big advantages:

  1. It’s more convenient for your users, since they’re not forced to update dplyr if they don’t want to)

  2. It’s easier on CRAN since it doesn’t require a massive coordinated release of multiple packages.

To make code work with multiple versions of a package, your first tool is the simple if statement:

if (utils::packageVersion("dplyr") > "0.5.0") {
  # code for new version
} else {
  # code for old version
}

Always condition on > current-version, not >= next-version because this will ensure that this branch is also used for the development version of the package. For example, if the current release is version “0.5.0”, the development version will be “0.5.0.9000”.

Occasionally, you’ll run into a situation where the NAMESPACE has changed and you need to conditionally import different functions. This typically occurs when functions are moved from one package to another. We try out best to provide automatic fallbacks, but this is not always possible. Often you can work around the problem by avoiding importFrom and using :: instead. Do this where possible:

if (utils::packageVersion("dplyr") > "0.5.0") {
  dbplyr::build_sql(...)
} else {
  dplyr::build_sql(...)
}

This will generate an R CMD check NOTE (because the one of the functions will always be missing), but this is ok. Simply explain that you get the note because you have written a wrapper to make sure your code is backward compatible.

Sometimes it’s not possible to avoid importFrom(). For example you might be importing a generic so that you can define a method for it. In this case, you can take advantage of a little-known feature in the NAMESPACE file: you can include if statements.

#' @rawNamespace
#' if (utils::packageVersion("dplyr") > "0.5.0") {
#'   importFrom("dbplyr", "build_sql")
#' } else {
#'   importFrom("dplyr", "build_sql")
#' }

dplyr 0.6.0

Database code moves to dbplyr

Almost all database related code has been moved out of dplyr and into a new package, dbplyr. This makes dplyr simpler, and will make it easier to release fixes for bugs that only affect databases. If you’ve implemented a database backend for dplyr, please read the backend news on the backend.

Depending on what generics you use, and what generics you provide methods for you, you may need to write some conditional code. To help make this easier we’ve written wrap_dbplyr_obj() which will write the helper code for you:

wrap_dbplyr_obj("build_sql")

wrap_dbplyr_obj("base_agg")

Simply copy the results of this function in your package.

These will generate R CMD check NOTES, so make sure to tell CRAN that this is to ensure backward compatibility.

Deprecation of underscored verbs_()

Because the tidyeval framework allows us to combine SE and NSE semantics within the same functions, the underscored verbs have been softly deprecated.

For users of SE_ verbs

The legacy underscored versions take objects for which a lazyeval::as.lazy() method is defined. This includes symbols and calls, strings, and formulas. All of these objects have been replaced with quosures and you can call tidyeval verbs with unquoted quosures:

quo <- quo(cyl)
select(mtcars, !! quo)

Symbolic expressions are also supported, but note that bare symbols and calls do not carry scope information. If you’re referring to objects in the data frame, it’s safe to omit specifying an enclosure:

sym <- quote(cyl)
select(mtcars, !! sym)

call <- quote(mean(cyl))
summarise(mtcars, cyl = !! call)

Transforming objects into quosures is generally straightforward. To enclose with the current environment, you can unquote directly in quo() or you can use as_quosure():

quo(!! sym)
#> <quosure>
#> expr: ^cyl
#> env:  global
quo(!! call)
#> <quosure>
#> expr: ^mean(cyl)
#> env:  global

rlang::as_quosure(sym)
#> Warning: `as_quosure()` requires an explicit environment as of rlang 0.3.0.
#> Please supply `env`.
#> This warning is displayed once per session.
#> <quosure>
#> expr: ^cyl
#> env:  global
rlang::as_quosure(call)
#> <quosure>
#> expr: ^mean(cyl)
#> env:  global

Note that while formulas and quosures are very similar objects (and in the most general sense, formulas are quosures), they can’t be used interchangeably in tidyeval functions. Early implementations did treat bare formulas as quosures, but this created compatibility issues with modelling functions of the stats package. Fortunately, it’s easy to transform formulas to quosures that will self-evaluate in tidyeval functions:

f <- ~cyl
f
#> ~cyl
rlang::as_quosure(f)
#> <quosure>
#> expr: ^cyl
#> env:  global

Finally, and perhaps most importantly, strings are not and should not be parsed. As developers, it is tempting to try and solve problems using strings because we have been trained to work with strings rather than quoted expressions. However it’s almost always the wrong way to approach the problem. The exception is for creating symbols. In that case it is perfectly legitimate to use strings:

rlang::sym("cyl")
#> cyl
rlang::syms(letters[1:3])
#> [[1]]
#> a
#> 
#> [[2]]
#> b
#> 
#> [[3]]
#> c

But you should never use strings to create calls. Instead you can use quasiquotation:

syms <- rlang::syms(c("foo", "bar", "baz"))
quo(my_call(!!! syms))
#> <quosure>
#> expr: ^my_call(foo, bar, baz)
#> env:  global

fun <- rlang::sym("my_call")
quo((!!fun)(!!! syms))
#> <quosure>
#> expr: ^my_call(foo, bar, baz)
#> env:  global

Or create the call with call2():

call <- rlang::call2("my_call", !!! syms)
call
#> my_call(foo, bar, baz)

rlang::as_quosure(call)
#> <quosure>
#> expr: ^my_call(foo, bar, baz)
#> env:  global

# Or equivalently:
quo(!! rlang::call2("my_call", !!! syms))
#> <quosure>
#> expr: ^my_call(foo, bar, baz)
#> env:  global

Note that idioms based on interp() should now generally be avoided and replaced with quasiquotation. Where you used to interpolate:

lazyeval::interp(~ mean(var), var = rlang::sym("mpg"))

You would now unquote:

var <- "mpg"
quo(mean(!! rlang::sym(var)))

See also vignette("programming") for more about quasiquotation and quosures.

For package authors

For package authors, rlang provides a compatibility file that you can copy to your package. compat_lazy() and compat_lazy_dots() turn lazy-able objects into proper quosures. This helps providing an underscored version to your users for backward compatibility. For instance, here is how we defined the underscored version of filter() in dplyr 0.6:

filter_.tbl_df <- function(.data, ..., .dots = list()) {
  dots <- compat_lazy_dots(.dots, caller_env(), ...)
  filter(.data, !!! dots)
}

With tidyeval, S3 dispatch to the correct method might be an issue. In the past, the genericity of dplyr verbs was accomplished by dispatching in the underscored versions. Now that those are deprecated, we’ve turned the non-underscored verbs into S3 generics.

We maintain backward compatibility by redispatching to old underscored verbs in the default methods of the new S3 generics. For example, here is how we redispatch filter():

filter.default <- function(.data, ...) {
  filter_(.data, .dots = compat_as_lazy_dots(...))
}

This gets the job done in most cases. However, the default method will not be called for objects inheriting from one of the classes for which we provide non-underscored methods: data.frame, tbl_df, tbl_cube and grouped_df. An example of this is the sf package whose objects have classes c("sf", "data.frame"). Authors of such packages should provide a method for the non-underscored generic in order to be compatible with dplyr:

filter.sf <- function(.data, ...) {
  st_as_sf(NextMethod())
}

If you need help with this, please let us know!

Deprecation of mutate_each() and summarise_each()

These functions have been replaced by a more complete family of functions. This family has suffixes _if, _at and _all and includes more verbs than just mutate summarise.

If you need to update your code to the new family, there are two relevant functions depending on which variables you apply funs() to. If you called mutate_each() without supplying a selection of variables, funs is applied to all variables. In this case, you should update your code to use mutate_all() instead:

mutate_each(starwars, funs(as.character))
mutate_all(starwars, funs(as.character))

Note that the new verbs support bare functions as well, so you don’t necessarily need to wrap with funs():

mutate_all(starwars, as.character)

On the other hand, if you supplied a variable selection, you should use mutate_at(). The variable selection should be wrapped with vars().

mutate_each(starwars, funs(as.character), height, mass)
mutate_at(starwars, vars(height, mass), as.character)

vars() supports all the selection helpers that you usually use with select():

summarise_at(mtcars, vars(starts_with("d")), mean)

Note that intead of a vars() selection, you can also supply character vectors of column names:

mutate_at(starwars, c("height", "mass"), as.character)
dplyr/vignettes/two-table.Rmd0000644000176200001440000001617414266276767016016 0ustar liggesusers--- title: "Two-table verbs" description: > Most dplyr verbs work with a single data set, but most data analyses involve multiple datasets. This vignette introduces you to the dplyr verbs that work with more one than data set, and introduces to the mutating joins, filtering joins, and the set operations. output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Two-table verbs} %\VignetteEngine{knitr::rmarkdown} \usepackage[utf8]{inputenc} --- ```{r, echo = FALSE, message = FALSE} knitr::opts_chunk$set(collapse = T, comment = "#>") options(tibble.print_min = 5) library(dplyr) ``` It's rare that a data analysis involves only a single table of data. In practice, you'll normally have many tables that contribute to an analysis, and you need flexible tools to combine them. In dplyr, there are three families of verbs that work with two tables at a time: * Mutating joins, which add new variables to one table from matching rows in another. * Filtering joins, which filter observations from one table based on whether or not they match an observation in the other table. * Set operations, which combine the observations in the data sets as if they were set elements. (This discussion assumes that you have [tidy data](https://www.jstatsoft.org/v59/i10/), where the rows are observations and the columns are variables. If you're not familiar with that framework, I'd recommend reading up on it first.) All two-table verbs work similarly. The first two arguments are `x` and `y`, and provide the tables to combine. The output is always a new table with the same type as `x`. ## Mutating joins Mutating joins allow you to combine variables from multiple tables. For example, consider the flights and airlines data from the nycflights13 package. In one table we have flight information with an abbreviation for carrier, and in another we have a mapping between abbreviations and full names. You can use a join to add the carrier names to the flight data: ```{r, warning = FALSE} library(nycflights13) # Drop unimportant variables so it's easier to understand the join results. flights2 <- flights %>% select(year:day, hour, origin, dest, tailnum, carrier) flights2 %>% left_join(airlines) ``` ### Controlling how the tables are matched As well as `x` and `y`, each mutating join takes an argument `by` that controls which variables are used to match observations in the two tables. There are a few ways to specify it, as I illustrate below with various tables from nycflights13: * `NULL`, the default. dplyr will will use all variables that appear in both tables, a __natural__ join. For example, the flights and weather tables match on their common variables: year, month, day, hour and origin. ```{r} flights2 %>% left_join(weather) ``` * A character vector, `by = "x"`. Like a natural join, but uses only some of the common variables. For example, `flights` and `planes` have `year` columns, but they mean different things so we only want to join by `tailnum`. ```{r} flights2 %>% left_join(planes, by = "tailnum") ``` Note that the year columns in the output are disambiguated with a suffix. * A named character vector: `by = c("x" = "a")`. This will match variable `x` in table `x` to variable `a` in table `y`. The variables from use will be used in the output. Each flight has an origin and destination `airport`, so we need to specify which one we want to join to: ```{r} flights2 %>% left_join(airports, c("dest" = "faa")) flights2 %>% left_join(airports, c("origin" = "faa")) ``` ### Types of join There are four types of mutating join, which differ in their behaviour when a match is not found. We'll illustrate each with a simple example: ```{r} df1 <- tibble(x = c(1, 2), y = 2:1) df2 <- tibble(x = c(3, 1), a = 10, b = "a") ``` * `inner_join(x, y)` only includes observations that match in both `x` and `y`. ```{r} df1 %>% inner_join(df2) %>% knitr::kable() ``` * `left_join(x, y)` includes all observations in `x`, regardless of whether they match or not. This is the most commonly used join because it ensures that you don't lose observations from your primary table. ```{r} df1 %>% left_join(df2) ``` * `right_join(x, y)` includes all observations in `y`. It's equivalent to `left_join(y, x)`, but the columns and rows will be ordered differently. ```{r} df1 %>% right_join(df2) df2 %>% left_join(df1) ``` * `full_join()` includes all observations from `x` and `y`. ```{r} df1 %>% full_join(df2) ``` The left, right and full joins are collectively know as __outer joins__. When a row doesn't match in an outer join, the new variables are filled in with missing values. ### Observations While mutating joins are primarily used to add new variables, they can also generate new observations. If a match is not unique, a join will add all possible combinations (the Cartesian product) of the matching observations: ```{r} df1 <- tibble(x = c(1, 1, 2), y = 1:3) df2 <- tibble(x = c(1, 1, 2), z = c("a", "b", "a")) df1 %>% left_join(df2) ``` ## Filtering joins Filtering joins match observations in the same way as mutating joins, but affect the observations, not the variables. There are two types: * `semi_join(x, y)` __keeps__ all observations in `x` that have a match in `y`. * `anti_join(x, y)` __drops__ all observations in `x` that have a match in `y`. These are most useful for diagnosing join mismatches. For example, there are many flights in the nycflights13 dataset that don't have a matching tail number in the planes table: ```{r} library("nycflights13") flights %>% anti_join(planes, by = "tailnum") %>% count(tailnum, sort = TRUE) ``` If you're worried about what observations your joins will match, start with a `semi_join()` or `anti_join()`. `semi_join()` and `anti_join()` never duplicate; they only ever remove observations. ```{r} df1 <- tibble(x = c(1, 1, 3, 4), y = 1:4) df2 <- tibble(x = c(1, 1, 2), z = c("a", "b", "a")) # Four rows to start with: df1 %>% nrow() # And we get four rows after the join df1 %>% inner_join(df2, by = "x") %>% nrow() # But only two rows actually match df1 %>% semi_join(df2, by = "x") %>% nrow() ``` ## Set operations The final type of two-table verb is set operations. These expect the `x` and `y` inputs to have the same variables, and treat the observations like sets: * `intersect(x, y)`: return only observations in both `x` and `y` * `union(x, y)`: return unique observations in `x` and `y` * `setdiff(x, y)`: return observations in `x`, but not in `y`. Given this simple data: ```{r} (df1 <- tibble(x = 1:2, y = c(1L, 1L))) (df2 <- tibble(x = 1:2, y = 1:2)) ``` The four possibilities are: ```{r} intersect(df1, df2) # Note that we get 3 rows, not 4 union(df1, df2) setdiff(df1, df2) setdiff(df2, df1) ``` ## Multiple-table verbs dplyr does not provide any functions for working with three or more tables. Instead use `purrr::reduce()` or `Reduce()`, as described in [Advanced R](http://adv-r.had.co.nz/Functionals.html#functionals-fp), to iteratively combine the two-table verbs to handle as many tables as you need. dplyr/vignettes/dplyr.Rmd0000644000176200001440000003304114366556340015227 0ustar liggesusers--- title: "Introduction to dplyr" output: rmarkdown::html_vignette description: > Start here if this is your first time using dplyr. You'll learn the basic philosophy, the most important data manipulation verbs, and the pipe, `%>%`, which allows you to combine multiple verbs together to solve real problems. vignette: > %\VignetteIndexEntry{Introduction to dplyr} %\VignetteEngine{knitr::rmarkdown} \usepackage[utf8]{inputenc} --- ```{r, echo = FALSE, message = FALSE} knitr::opts_chunk$set(collapse = T, comment = "#>") options(tibble.print_min = 4L, tibble.print_max = 4L) library(dplyr) set.seed(1014) ``` When working with data you must: * Figure out what you want to do. * Describe those tasks in the form of a computer program. * Execute the program. The dplyr package makes these steps fast and easy: * By constraining your options, it helps you think about your data manipulation challenges. * It provides simple "verbs", functions that correspond to the most common data manipulation tasks, to help you translate your thoughts into code. * It uses efficient backends, so you spend less time waiting for the computer. This document introduces you to dplyr's basic set of tools, and shows you how to apply them to data frames. dplyr also supports databases via the dbplyr package, once you've installed, read `vignette("dbplyr")` to learn more. ## Data: starwars To explore the basic data manipulation verbs of dplyr, we'll use the dataset `starwars`. This dataset contains `r nrow(starwars)` characters and comes from the [Star Wars API](https://swapi.dev), and is documented in `?starwars` ```{r} dim(starwars) starwars ``` Note that `starwars` is a tibble, a modern reimagining of the data frame. It's particularly useful for large datasets because it only prints the first few rows. You can learn more about tibbles at ; in particular you can convert data frames to tibbles with `as_tibble()`. ## Single table verbs dplyr aims to provide a function for each basic verb of data manipulation. These verbs can be organised into three categories based on the component of the dataset that they work with: * Rows: * `filter()` chooses rows based on column values. * `slice()` chooses rows based on location. * `arrange()` changes the order of the rows. * Columns: * `select()` changes whether or not a column is included. * `rename()` changes the name of columns. * `mutate()` changes the values of columns and creates new columns. * `relocate()` changes the order of the columns. * Groups of rows: * `summarise()` collapses a group into a single row. ### The pipe All of the dplyr functions take a data frame (or tibble) as the first argument. Rather than forcing the user to either save intermediate objects or nest functions, dplyr provides the `%>%` operator from magrittr. `x %>% f(y)` turns into `f(x, y)` so the result from one step is then "piped" into the next step. You can use the pipe to rewrite multiple operations that you can read left-to-right, top-to-bottom (reading the pipe operator as "then"). ### Filter rows with `filter()` `filter()` allows you to select a subset of rows in a data frame. Like all single verbs, the first argument is the tibble (or data frame). The second and subsequent arguments refer to variables within that data frame, selecting rows where the expression is `TRUE`. For example, we can select all character with light skin color and brown eyes with: ```{r} starwars %>% filter(skin_color == "light", eye_color == "brown") ``` This is roughly equivalent to this base R code: ```{r, eval = FALSE} starwars[starwars$skin_color == "light" & starwars$eye_color == "brown", ] ``` ### Arrange rows with `arrange()` `arrange()` works similarly to `filter()` except that instead of filtering or selecting rows, it reorders them. It takes a data frame, and a set of column names (or more complicated expressions) to order by. If you provide more than one column name, each additional column will be used to break ties in the values of preceding columns: ```{r} starwars %>% arrange(height, mass) ``` Use `desc()` to order a column in descending order: ```{r} starwars %>% arrange(desc(height)) ``` ### Choose rows using their position with `slice()` `slice()` lets you index rows by their (integer) locations. It allows you to select, remove, and duplicate rows. We can get characters from row numbers 5 through 10. ```{r} starwars %>% slice(5:10) ``` It is accompanied by a number of helpers for common use cases: * `slice_head()` and `slice_tail()` select the first or last rows. ```{r} starwars %>% slice_head(n = 3) ``` * `slice_sample()` randomly selects rows. Use the option prop to choose a certain proportion of the cases. ```{r} starwars %>% slice_sample(n = 5) starwars %>% slice_sample(prop = 0.1) ``` Use `replace = TRUE` to perform a bootstrap sample. If needed, you can weight the sample with the `weight` argument. * `slice_min()` and `slice_max()` select rows with highest or lowest values of a variable. Note that we first must choose only the values which are not NA. ```{r} starwars %>% filter(!is.na(height)) %>% slice_max(height, n = 3) ``` ### Select columns with `select()` Often you work with large datasets with many columns but only a few are actually of interest to you. `select()` allows you to rapidly zoom in on a useful subset using operations that usually only work on numeric variable positions: ```{r} # Select columns by name starwars %>% select(hair_color, skin_color, eye_color) # Select all columns between hair_color and eye_color (inclusive) starwars %>% select(hair_color:eye_color) # Select all columns except those from hair_color to eye_color (inclusive) starwars %>% select(!(hair_color:eye_color)) # Select all columns ending with color starwars %>% select(ends_with("color")) ``` There are a number of helper functions you can use within `select()`, like `starts_with()`, `ends_with()`, `matches()` and `contains()`. These let you quickly match larger blocks of variables that meet some criterion. See `?select` for more details. You can rename variables with `select()` by using named arguments: ```{r} starwars %>% select(home_world = homeworld) ``` But because `select()` drops all the variables not explicitly mentioned, it's not that useful. Instead, use `rename()`: ```{r} starwars %>% rename(home_world = homeworld) ``` ### Add new columns with `mutate()` Besides selecting sets of existing columns, it's often useful to add new columns that are functions of existing columns. This is the job of `mutate()`: ```{r} starwars %>% mutate(height_m = height / 100) ``` We can't see the height in meters we just calculated, but we can fix that using a select command. ```{r} starwars %>% mutate(height_m = height / 100) %>% select(height_m, height, everything()) ``` `dplyr::mutate()` is similar to the base `transform()`, but allows you to refer to columns that you've just created: ```{r} starwars %>% mutate( height_m = height / 100, BMI = mass / (height_m^2) ) %>% select(BMI, everything()) ``` If you only want to keep the new variables, use `.keep = "none"`: ```{r} starwars %>% mutate( height_m = height / 100, BMI = mass / (height_m^2), .keep = "none" ) ``` ### Change column order with `relocate()` Use a similar syntax as `select()` to move blocks of columns at once ```{r} starwars %>% relocate(sex:homeworld, .before = height) ``` ### Summarise values with `summarise()` The last verb is `summarise()`. It collapses a data frame to a single row. ```{r} starwars %>% summarise(height = mean(height, na.rm = TRUE)) ``` It's not that useful until we learn the `group_by()` verb below. ### Commonalities You may have noticed that the syntax and function of all these verbs are very similar: * The first argument is a data frame. * The subsequent arguments describe what to do with the data frame. You can refer to columns in the data frame directly without using `$`. * The result is a new data frame Together these properties make it easy to chain together multiple simple steps to achieve a complex result. These five functions provide the basis of a language of data manipulation. At the most basic level, you can only alter a tidy data frame in five useful ways: you can reorder the rows (`arrange()`), pick observations and variables of interest (`filter()` and `select()`), add new variables that are functions of existing variables (`mutate()`), or collapse many values to a summary (`summarise()`). ## Combining functions with `%>%` The dplyr API is functional in the sense that function calls don't have side-effects. You must always save their results. This doesn't lead to particularly elegant code, especially if you want to do many operations at once. You either have to do it step-by-step: ```{r, eval = FALSE} a1 <- group_by(starwars, species, sex) a2 <- select(a1, height, mass) a3 <- summarise(a2, height = mean(height, na.rm = TRUE), mass = mean(mass, na.rm = TRUE) ) ``` Or if you don't want to name the intermediate results, you need to wrap the function calls inside each other: ```{r} summarise( select( group_by(starwars, species, sex), height, mass ), height = mean(height, na.rm = TRUE), mass = mean(mass, na.rm = TRUE) ) ``` This is difficult to read because the order of the operations is from inside to out. Thus, the arguments are a long way away from the function. To get around this problem, dplyr provides the `%>%` operator from magrittr. `x %>% f(y)` turns into `f(x, y)` so you can use it to rewrite multiple operations that you can read left-to-right, top-to-bottom (reading the pipe operator as "then"): ```{r, eval = FALSE} starwars %>% group_by(species, sex) %>% select(height, mass) %>% summarise( height = mean(height, na.rm = TRUE), mass = mean(mass, na.rm = TRUE) ) ``` ## Patterns of operations The dplyr verbs can be classified by the type of operations they accomplish (we sometimes speak of their **semantics**, i.e., their meaning). It's helpful to have a good grasp of the difference between select and mutate operations. ### Selecting operations One of the appealing features of dplyr is that you can refer to columns from the tibble as if they were regular variables. However, the syntactic uniformity of referring to bare column names hides semantical differences across the verbs. A column symbol supplied to `select()` does not have the same meaning as the same symbol supplied to `mutate()`. Selecting operations expect column names and positions. Hence, when you call `select()` with bare variable names, they actually represent their own positions in the tibble. The following calls are completely equivalent from dplyr's point of view: ```{r} # `name` represents the integer 1 select(starwars, name) select(starwars, 1) ``` By the same token, this means that you cannot refer to variables from the surrounding context if they have the same name as one of the columns. In the following example, `height` still represents 2, not 5: ```{r} height <- 5 select(starwars, height) ``` One useful subtlety is that this only applies to bare names and to selecting calls like `c(height, mass)` or `height:mass`. In all other cases, the columns of the data frame are not put in scope. This allows you to refer to contextual variables in selection helpers: ```{r} name <- "color" select(starwars, ends_with(name)) ``` These semantics are usually intuitive. But note the subtle difference: ```{r} name <- 5 select(starwars, name, identity(name)) ``` In the first argument, `name` represents its own position `1`. In the second argument, `name` is evaluated in the surrounding context and represents the fifth column. For a long time, `select()` used to only understand column positions. Counting from dplyr 0.6, it now understands column names as well. This makes it a bit easier to program with `select()`: ```{r} vars <- c("name", "height") select(starwars, all_of(vars), "mass") ``` ### Mutating operations Mutate semantics are quite different from selection semantics. Whereas `select()` expects column names or positions, `mutate()` expects *column vectors*. We will set up a smaller tibble to use for our examples. ```{r} df <- starwars %>% select(name, height, mass) ``` When we use `select()`, the bare column names stand for their own positions in the tibble. For `mutate()` on the other hand, column symbols represent the actual column vectors stored in the tibble. Consider what happens if we give a string or a number to `mutate()`: ```{r} mutate(df, "height", 2) ``` `mutate()` gets length-1 vectors that it interprets as new columns in the data frame. These vectors are recycled so they match the number of rows. That's why it doesn't make sense to supply expressions like `"height" + 10` to `mutate()`. This amounts to adding 10 to a string! The correct expression is: ```{r} mutate(df, height + 10) ``` In the same way, you can unquote values from the context if these values represent a valid column. They must be either length 1 (they then get recycled) or have the same length as the number of rows. In the following example we create a new vector that we add to the data frame: ```{r} var <- seq(1, nrow(df)) mutate(df, new = var) ``` A case in point is `group_by()`. While you might think it has select semantics, it actually has mutate semantics. This is quite handy as it allows to group by a modified column: ```{r} group_by(starwars, sex) group_by(starwars, sex = as.factor(sex)) group_by(starwars, height_binned = cut(height, 3)) ``` This is why you can't supply a column name to `group_by()`. This amounts to creating a new column containing the string recycled to the number of rows: ```{r} group_by(df, "month") ``` dplyr/vignettes/programming.Rmd0000644000176200001440000003536214406402754016421 0ustar liggesusers--- title: "Programming with dplyr" description: > Most dplyr verbs use "tidy evaluation", a special type of non-standard evaluation. In this vignette, you'll learn the two basic forms, data masking and tidy selection, and how you can program with them using either functions or for loops. output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Programming with dplyr} %\VignetteEngine{knitr::rmarkdown} %\usepackage[utf8]{inputenc} --- ```{r, echo = FALSE, message = FALSE} knitr::opts_chunk$set(collapse = T, comment = "#>") options(tibble.print_min = 4L, tibble.print_max = 4L) set.seed(1014) ``` ## Introduction Most dplyr verbs use **tidy evaluation** in some way. Tidy evaluation is a special type of non-standard evaluation used throughout the tidyverse. There are two basic forms found in dplyr: - `arrange()`, `count()`, `filter()`, `group_by()`, `mutate()`, and `summarise()` use **data masking** so that you can use data variables as if they were variables in the environment (i.e. you write `my_variable` not `df$my_variable`). - `across()`, `relocate()`, `rename()`, `select()`, and `pull()` use **tidy selection** so you can easily choose variables based on their position, name, or type (e.g. `starts_with("x")` or `is.numeric`). To determine whether a function argument uses data masking or tidy selection, look at the documentation: in the arguments list, you'll see `` or ``. Data masking and tidy selection make interactive data exploration fast and fluid, but they add some new challenges when you attempt to use them indirectly such as in a for loop or a function. This vignette shows you how to overcome those challenges. We'll first go over the basics of data masking and tidy selection, talk about how to use them indirectly, and then show you a number of recipes to solve common problems. This vignette will give you the minimum knowledge you need to be an effective programmer with tidy evaluation. If you'd like to learn more about the underlying theory, or precisely how it's different from non-standard evaluation, we recommend that you read the Metaprogramming chapters in [*Advanced R*](https://adv-r.hadley.nz). ```{r setup, message = FALSE} library(dplyr) ``` ## Data masking Data masking makes data manipulation faster because it requires less typing. In most (but not all[^1]) base R functions you need to refer to variables with `$`, leading to code that repeats the name of the data frame many times: [^1]: dplyr's `filter()` is inspired by base R's `subset()`. `subset()` provides data masking, but not with tidy evaluation, so the techniques described in this chapter don't apply to it. ```{r, results = FALSE} starwars[starwars$homeworld == "Naboo" & starwars$species == "Human", ,] ``` The dplyr equivalent of this code is more concise because data masking allows you to need to type `starwars` once: ```{r, results = FALSE} starwars %>% filter(homeworld == "Naboo", species == "Human") ``` ### Data- and env-variables The key idea behind data masking is that it blurs the line between the two different meanings of the word "variable": - **env-variables** are "programming" variables that live in an environment. They are usually created with `<-`. - **data-variables** are "statistical" variables that live in a data frame. They usually come from data files (e.g. `.csv`, `.xls`), or are created manipulating existing variables. To make those definitions a little more concrete, take this piece of code: ```{r} df <- data.frame(x = runif(3), y = runif(3)) df$x ``` It creates a env-variable, `df`, that contains two data-variables, `x` and `y`. Then it extracts the data-variable `x` out of the env-variable `df` using `$`. I think this blurring of the meaning of "variable" is a really nice feature for interactive data analysis because it allows you to refer to data-vars as is, without any prefix. And this seems to be fairly intuitive since many newer R users will attempt to write `diamonds[x == 0 | y == 0, ]`. Unfortunately, this benefit does not come for free. When you start to program with these tools, you're going to have to grapple with the distinction. This will be hard because you've never had to think about it before, so it'll take a while for your brain to learn these new concepts and categories. However, once you've teased apart the idea of "variable" into data-variable and env-variable, I think you'll find it fairly straightforward to use. ### Indirection The main challenge of programming with functions that use data masking arises when you introduce some indirection, i.e. when you want to get the data-variable from an env-variable instead of directly typing the data-variable's name. There are two main cases: - When you have the data-variable in a function argument (i.e. an env-variable that holds a promise[^2]), you need to **embrace** the argument by surrounding it in doubled braces, like `filter(df, {{ var }})`. The following function uses embracing to create a wrapper around `summarise()` that computes the minimum and maximum values of a variable, as well as the number of observations that were summarised: ```{r, results = FALSE} var_summary <- function(data, var) { data %>% summarise(n = n(), min = min({{ var }}), max = max({{ var }})) } mtcars %>% group_by(cyl) %>% var_summary(mpg) ``` - When you have an env-variable that is a character vector, you need to index into the `.data` pronoun with `[[`, like `summarise(df, mean = mean(.data[[var]]))`. The following example uses `.data` to count the number of unique values in each variable of `mtcars`: ```{r, results = FALSE} for (var in names(mtcars)) { mtcars %>% count(.data[[var]]) %>% print() } ``` Note that `.data` is not a data frame; it's a special construct, a pronoun, that allows you to access the current variables either directly, with `.data$x` or indirectly with `.data[[var]]`. Don't expect other functions to work with it. [^2]: In R, arguments are lazily evaluated which means that until you attempt to use, they don't hold a value, just a **promise** that describes how to compute the value. You can learn more at ### Name injection Many data masking functions also use dynamic dots, which gives you another useful feature: generating names programmatically by using `:=` instead of `=`. There are two basics forms, as illustrated below with `tibble()`: - If you have the name in an env-variable, you can use glue syntax to interpolate in: ```{r} name <- "susan" tibble("{name}" := 2) ``` - If the name should be derived from a data-variable in an argument, you can use embracing syntax: ```{r} my_df <- function(x) { tibble("{{x}}_2" := x * 2) } my_var <- 10 my_df(my_var) ``` Learn more in `` ?rlang::`dyn-dots` ``. ## Tidy selection Data masking makes it easy to compute on values within a dataset. Tidy selection is a complementary tool that makes it easy to work with the columns of a dataset. ### The tidyselect DSL Underneath all functions that use tidy selection is the [tidyselect](https://tidyselect.r-lib.org/) package. It provides a miniature domain specific language that makes it easy to select columns by name, position, or type. For example: - `select(df, 1)` selects the first column; `select(df, last_col())` selects the last column. - `select(df, c(a, b, c))` selects columns `a`, `b`, and `c`. - `select(df, starts_with("a"))` selects all columns whose name starts with "a"; `select(df, ends_with("z"))` selects all columns whose name ends with "z". - `select(df, where(is.numeric))` selects all numeric columns. You can see more details in `?dplyr_tidy_select`. ### Indirection As with data masking, tidy selection makes a common task easier at the cost of making a less common task harder. When you want to use tidy select indirectly with the column specification stored in an intermediate variable, you'll need to learn some new tools. Again, there are two forms of indirection: - When you have the data-variable in an env-variable that is a function argument, you use the same technique as data masking: you **embrace** the argument by surrounding it in doubled braces. The following function summarises a data frame by computing the mean of all variables selected by the user: ```{r, results = FALSE} summarise_mean <- function(data, vars) { data %>% summarise(n = n(), across({{ vars }}, mean)) } mtcars %>% group_by(cyl) %>% summarise_mean(where(is.numeric)) ``` - When you have an env-variable that is a character vector, you need to use `all_of()` or `any_of()` depending on whether you want the function to error if a variable is not found. The following code uses `all_of()` to select all of the variables found in a character vector; then `!` plus `all_of()` to select all of the variables *not* found in a character vector: ```{r, results = FALSE} vars <- c("mpg", "vs") mtcars %>% select(all_of(vars)) mtcars %>% select(!all_of(vars)) ``` ## How-tos The following examples solve a grab bag of common problems. We show you the minimum amount of code so that you can get the basic idea; most real problems will require more code or combining multiple techniques. ### User-supplied data If you check the documentation, you'll see that `.data` never uses data masking or tidy select. That means you don't need to do anything special in your function: ```{r} mutate_y <- function(data) { mutate(data, y = a + x) } ``` ### One or more user-supplied expressions If you want the user to supply an expression that's passed onto an argument which uses data masking or tidy select, embrace the argument: ```{r} my_summarise <- function(data, group_var) { data %>% group_by({{ group_var }}) %>% summarise(mean = mean(mass)) } ``` This generalises in a straightforward way if you want to use one user-supplied expression in multiple places: ```{r} my_summarise2 <- function(data, expr) { data %>% summarise( mean = mean({{ expr }}), sum = sum({{ expr }}), n = n() ) } ``` If you want the user to provide multiple expressions, embrace each of them: ```{r} my_summarise3 <- function(data, mean_var, sd_var) { data %>% summarise(mean = mean({{ mean_var }}), sd = sd({{ sd_var }})) } ``` If you want to use the name of a variable in the output, you can embrace the variable name on the left-hand side of `:=` with `{{`: ```{r} my_summarise4 <- function(data, expr) { data %>% summarise( "mean_{{expr}}" := mean({{ expr }}), "sum_{{expr}}" := sum({{ expr }}), "n_{{expr}}" := n() ) } my_summarise5 <- function(data, mean_var, sd_var) { data %>% summarise( "mean_{{mean_var}}" := mean({{ mean_var }}), "sd_{{sd_var}}" := sd({{ sd_var }}) ) } ``` ### Any number of user-supplied expressions If you want to take an arbitrary number of user supplied expressions, use `...`. This is most often useful when you want to give the user full control over a single part of the pipeline, like a `group_by()` or a `mutate()`. ```{r} my_summarise <- function(.data, ...) { .data %>% group_by(...) %>% summarise(mass = mean(mass, na.rm = TRUE), height = mean(height, na.rm = TRUE)) } starwars %>% my_summarise(homeworld) starwars %>% my_summarise(sex, gender) ``` When you use `...` in this way, make sure that any other arguments start with `.` to reduce the chances of argument clashes; see for more details. ### Creating multiple columns Sometimes it can be useful for a single expression to return multiple columns. You can do this by returning an unnamed data frame: ```{r} quantile_df <- function(x, probs = c(0.25, 0.5, 0.75)) { tibble( val = quantile(x, probs), quant = probs ) } x <- 1:5 quantile_df(x) ``` This sort of function is useful inside `summarise()` and `mutate()` which allow you to add multiple columns by returning a data frame: ```{r} df <- tibble( grp = rep(1:3, each = 10), x = runif(30), y = rnorm(30) ) df %>% group_by(grp) %>% summarise(quantile_df(x, probs = .5)) df %>% group_by(grp) %>% summarise(across(x:y, ~ quantile_df(.x, probs = .5), .unpack = TRUE)) ``` Notice that we set `.unpack = TRUE` inside `across()`. This tells `across()` to _unpack_ the data frame returned by `quantile_df()` into its respective columns, combining the column names of the original columns (`x` and `y`) with the column names returned from the function (`val` and `quant`). If your function returns multiple _rows_ per group, then you'll need to switch from `summarise()` to `reframe()`. `summarise()` is restricted to returning 1 row summaries per group, but `reframe()` lifts this restriction: ```{r} df %>% group_by(grp) %>% reframe(across(x:y, quantile_df, .unpack = TRUE)) ``` ### Transforming user-supplied variables If you want the user to provide a set of data-variables that are then transformed, use `across()` and `pick()`: ```{r} my_summarise <- function(data, summary_vars) { data %>% summarise(across({{ summary_vars }}, ~ mean(., na.rm = TRUE))) } starwars %>% group_by(species) %>% my_summarise(c(mass, height)) ``` You can use this same idea for multiple sets of input data-variables: ```{r} my_summarise <- function(data, group_var, summarise_var) { data %>% group_by(pick({{ group_var }})) %>% summarise(across({{ summarise_var }}, mean)) } ``` Use the `.names` argument to `across()` to control the names of the output. ```{r} my_summarise <- function(data, group_var, summarise_var) { data %>% group_by(pick({{ group_var }})) %>% summarise(across({{ summarise_var }}, mean, .names = "mean_{.col}")) } ``` ### Loop over multiple variables If you have a character vector of variable names, and want to operate on them with a for loop, index into the special `.data` pronoun: ```{r, results = FALSE} for (var in names(mtcars)) { mtcars %>% count(.data[[var]]) %>% print() } ``` This same technique works with for loop alternatives like the base R `apply()` family and the purrr `map()` family: ```{r, results = FALSE} mtcars %>% names() %>% purrr::map(~ count(mtcars, .data[[.x]])) ``` (Note that the `x` in `.data[[x]]` is always treated as an env-variable; it will never come from the data.) ### Use a variable from an Shiny input Many Shiny input controls return character vectors, so you can use the same approach as above: `.data[[input$var]]`. ```{r, eval = FALSE} library(shiny) ui <- fluidPage( selectInput("var", "Variable", choices = names(diamonds)), tableOutput("output") ) server <- function(input, output, session) { data <- reactive(filter(diamonds, .data[[input$var]] > 0)) output$output <- renderTable(head(data())) } ``` See for more details and case studies. dplyr/vignettes/compatibility.R0000644000176200001440000000712413577426157016435 0ustar liggesusers## ----setup, include = FALSE--------------------------------------------------- library(dplyr) knitr::opts_chunk$set(collapse = T, comment = "#>") ## ---- results = "hide"-------------------------------------------------------- if (utils::packageVersion("dplyr") > "0.5.0") { # code for new version } else { # code for old version } ## ---- eval = FALSE------------------------------------------------------------ # if (utils::packageVersion("dplyr") > "0.5.0") { # dbplyr::build_sql(...) # } else { # dplyr::build_sql(...) # } ## ----------------------------------------------------------------------------- #' @rawNamespace #' if (utils::packageVersion("dplyr") > "0.5.0") { #' importFrom("dbplyr", "build_sql") #' } else { #' importFrom("dplyr", "build_sql") #' } ## ---- eval = FALSE------------------------------------------------------------ # wrap_dbplyr_obj("build_sql") # # wrap_dbplyr_obj("base_agg") ## ---- eval = FALSE------------------------------------------------------------ # quo <- quo(cyl) # select(mtcars, !! quo) ## ---- results = "hide"-------------------------------------------------------- sym <- quote(cyl) select(mtcars, !! sym) call <- quote(mean(cyl)) summarise(mtcars, cyl = !! call) ## ----------------------------------------------------------------------------- quo(!! sym) quo(!! call) rlang::as_quosure(sym) rlang::as_quosure(call) ## ----------------------------------------------------------------------------- f <- ~cyl f rlang::as_quosure(f) ## ----------------------------------------------------------------------------- rlang::sym("cyl") rlang::syms(letters[1:3]) ## ----------------------------------------------------------------------------- syms <- rlang::syms(c("foo", "bar", "baz")) quo(my_call(!!! syms)) fun <- rlang::sym("my_call") quo((!!fun)(!!! syms)) ## ----------------------------------------------------------------------------- call <- rlang::call2("my_call", !!! syms) call rlang::as_quosure(call) # Or equivalently: quo(!! rlang::call2("my_call", !!! syms)) ## ---- eval=FALSE-------------------------------------------------------------- # lazyeval::interp(~ mean(var), var = rlang::sym("mpg")) ## ---- eval=FALSE-------------------------------------------------------------- # var <- "mpg" # quo(mean(!! rlang::sym(var))) ## ---- eval = FALSE------------------------------------------------------------ # filter_.tbl_df <- function(.data, ..., .dots = list()) { # dots <- compat_lazy_dots(.dots, caller_env(), ...) # filter(.data, !!! dots) # } ## ---- eval = FALSE------------------------------------------------------------ # filter.default <- function(.data, ...) { # filter_(.data, .dots = compat_as_lazy_dots(...)) # } ## ---- eval = FALSE------------------------------------------------------------ # filter.sf <- function(.data, ...) { # st_as_sf(NextMethod()) # } ## ---- eval = FALSE------------------------------------------------------------ # mutate_each(starwars, funs(as.character)) # mutate_all(starwars, funs(as.character)) ## ---- eval = FALSE------------------------------------------------------------ # mutate_all(starwars, as.character) ## ---- eval = FALSE------------------------------------------------------------ # mutate_each(starwars, funs(as.character), height, mass) # mutate_at(starwars, vars(height, mass), as.character) ## ---- eval = FALSE------------------------------------------------------------ # summarise_at(mtcars, vars(starts_with("d")), mean) ## ---- eval = FALSE------------------------------------------------------------ # mutate_at(starwars, c("height", "mass"), as.character) dplyr/vignettes/in-packages.Rmd0000644000176200001440000002006414406402754016252 0ustar liggesusers--- title: "Using dplyr in packages" description: > A guide for package authors who use dplyr. output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Using dplyr in packages} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ``` ```{r setup, message = FALSE} library(dplyr) ``` This vignette is aimed at package authors who use dplyr in their packages. We will discuss best practices learned over the years to avoid `R CMD check` notes and warnings, and how to handle when dplyr deprecates functions. ## Join helpers As of dplyr 1.1.0, we've introduced `join_by()` along 4 helpers for performing various types of joins: - `closest()` - `between()` - `within()` - `overlaps()` `join_by()` implements a domain specific language (DSL) for joins, and internally interprets calls to these functions. You'll notice that `dplyr::closest()` isn't an exported function from dplyr (`dplyr::between()` and `base::within()` do happen to be preexisting functions). If you use `closest()` in your package, then this will cause an `R CMD check` note letting you know that you've used a symbol that doesn't belong to any package. To silence this, place `utils::globalVariables("closest")` in a source file in your package (but outside of any function). dbplyr does a similar thing for SQL functions, so you can see an example of that [here](https://github.com/tidyverse/dbplyr/blob/7edf5d607fd6b0b897721ea96d1c9ca9401f0f9b/R/backend-redshift.R#L144). You may also have to add utils to your package Imports, even though it is a base package. You can do that easily with `usethis::use_package("utils")`. ## Data masking and tidy selection NOTEs If you're writing a package and you have a function that uses data masking or tidy selection: ```{r} my_summary_function <- function(data) { data %>% select(grp, x, y) %>% filter(x > 0) %>% group_by(grp) %>% summarise(y = mean(y), n = n()) } ``` You'll get an `NOTE` because `R CMD check` doesn't know that dplyr functions use tidy evaluation: N checking R code for possible problems my_summary_function: no visible binding for global variable ‘grp’, ‘x’, ‘y’ Undefined global functions or variables: grp x y To eliminate this note: - For data masking, import `.data` from [rlang](https://rlang.r-lib.org/) and then use `.data$var` instead of `var`. - For tidy selection, use `"var"` instead of `var`. That yields: ```{r} #' @importFrom rlang .data my_summary_function <- function(data) { data %>% select("grp", "x", "y") %>% filter(.data$x > 0) %>% group_by(.data$grp) %>% summarise(y = mean(.data$y), n = n()) } ``` For more about programming with dplyr, see `vignette("programming", package = "dplyr")`. ## Deprecation This section is focused on updating package code to deal with backwards incompatible changes in dplyr. We do try and minimize backward incompatible changes as much as possible, but sometimes they are necessary in order to radically simplify existing code, or unlock a lot of potential value in the future. We will start with some general advice about supporting multiple versions of dplyr at once, and then we will discuss some specific changes in dplyr. ### Multiple dplyr versions Ideally, when we introduce a breaking change you'll want to make sure that your package works with both the released version and the development version of dplyr. This is typically a little bit more work, but has two big advantages: - It's more convenient for your users, since your package will work for them regardless of what version of dplyr they have installed. - It's easier on CRAN since it doesn't require a massive coordinated release of multiple packages. If we break your package, we will typically send you a pull request that implements a patch before releasing the next version of dplyr. Most of the time, this patch will be backwards compatible with older versions of dplyr as well. Ideally, you'll accept this patch and submit a new version of your package to CRAN before the new version of dplyr is released. To make code work with multiple versions of a package, your first tool is the simple if statement: ```{r, eval=FALSE} if (utils::packageVersion("dplyr") > "0.5.0") { # code for new version } else { # code for old version } ``` Always condition on `> current-version`, not `>= next-version` because this will ensure that this branch is also used for the development version of the package. For example, if the current release is version `"0.5.0"`, the development version will be `"0.5.0.9000"`. This typically works well if the branch for the "new version" introduces a new argument or has a slightly different return value. This *doesn't* work if we've introduced a new function that you need to switch to, like: ```{r, eval=FALSE} if (utils::packageVersion("dplyr") > "1.0.10") { dplyr::reframe(df, x = unique(x)) } else { dplyr::summarise(df, x = unique(x)) } ``` In this case, when checks are run with dplyr 1.0.10 you'll get a warning about using a function from dplyr that doesn't exist (`reframe()`) even though that branch will never run. You can get around this by using `utils::getFromNamespace()` to indirectly call the new dplyr function: ```{r, eval=FALSE} if (utils::packageVersion("dplyr") > "1.0.10") { utils::getFromNamespace("reframe", "dplyr")(df, x = unique(x)) } else { dplyr::summarise(df, x = unique(x)) } ``` As soon as the next version of dplyr is actually on CRAN (1.1.0 in this case), you should feel free to remove this code and unconditionally use `reframe()` as long as you also require `dplyr (>= 1.1.0)` in your `DESCRIPTION` file. This is typically not very painful for users, because they'd already be updating your package when they run into this requirement, so updating one more package along the way is generally easy. It also helps them get the latest bug fixes and features from dplyr. Sometimes, it isn't possible to avoid a call to `@importFrom`. For example you might be importing a generic so that you can define a method for it, but that generic has moved between packages. In this case, you can take advantage of a little-known feature in the `NAMESPACE` file: you can include raw `if` statements. ```{r, eval=FALSE} #' @rawNamespace #' if (utils::packageVersion("dplyr") > "0.5.0") { #' importFrom("dbplyr", "build_sql") #' } else { #' importFrom("dplyr", "build_sql") #' } ``` ### Deprecation of `mutate_*()` and `summarise_*()` The following `mutate()` and `summarise()` variants were deprecated in dplyr 0.7.0: - `mutate_each()`, `summarise_each()` and the following variants were superseded in dplyr 1.0.0: - `mutate_all()`, `summarise_all()` - `mutate_if()`, `summarise_if()` - `mutate_at()`, `summarise_at()` These have all been replaced by using `mutate()` or `summarise()` in combination with `across()`, which was introduced in dplyr 1.0.0. If you used `mutate_all()` or `mutate_each()` without supplying a selection, you should update to use `across(everything())`: ```{r, eval=FALSE} starwars %>% mutate_each(funs(as.character)) starwars %>% mutate_all(funs(as.character)) starwars %>% mutate(across(everything(), as.character)) ``` If you provided a selection through `mutate_at()` or `mutate_each()`, then you can switch to `across()` with a selection: ```{r, eval = FALSE} starwars %>% mutate_each(funs(as.character), height, mass) starwars %>% mutate_at(vars(height, mass), as.character) starwars %>% mutate(across(c(height, mass), as.character)) ``` If you used predicates with `mutate_if()`, you can switch to using `across()` in combination with `where()`: ```{r, eval=FALSE} starwars %>% mutate_if(is.factor, as.character) starwars %>% mutate(across(where(is.factor), as.character)) ``` ## Data frame subclasses If you are a package author that is *extending* dplyr to work with a new data frame subclass, then we encourage you to read the documentation in `?dplyr_extending`. This contains advice on how to implement the minimal number of extension generics possible to get maximal compatibility across dplyr's verbs. dplyr/vignettes/grouping.Rmd0000644000176200001440000001637514366556340015742 0ustar liggesusers--- title: "Grouped data" description: > To unlock the full potential of dplyr, you need to understand how each verb interacts with grouping. This vignette shows you how to manipulate grouping, how each verb changes its behaviour when working with grouped data, and how you can access data about the "current" group from within a verb. output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Grouped data} %\VignetteEngine{knitr::rmarkdown} \usepackage[utf8]{inputenc} --- ```{r, echo = FALSE, message = FALSE, warning = FALSE} knitr::opts_chunk$set(collapse = T, comment = "#>") options(tibble.print_min = 4L, tibble.print_max = 4L) ``` dplyr verbs are particularly powerful when you apply them to grouped data frames (`grouped_df` objects). This vignette shows you: * How to group, inspect, and ungroup with `group_by()` and friends. * How individual dplyr verbs changes their behaviour when applied to grouped data frame. * How to access data about the "current" group from within a verb. We'll start by loading dplyr: ```{r, message = FALSE} library(dplyr) ``` ## `group_by()` The most important grouping verb is `group_by()`: it takes a data frame and one or more variables to group by: ```{r} by_species <- starwars %>% group_by(species) by_sex_gender <- starwars %>% group_by(sex, gender) ``` You can see the grouping when you print the data: ```{r} by_species by_sex_gender ``` Or use `tally()` to count the number of rows in each group. The `sort` argument is useful if you want to see the largest groups up front. ```{r} by_species %>% tally() by_sex_gender %>% tally(sort = TRUE) ``` As well as grouping by existing variables, you can group by any function of existing variables. This is equivalent to performing a `mutate()` **before** the `group_by()`: ```{r group_by_with_expression} bmi_breaks <- c(0, 18.5, 25, 30, Inf) starwars %>% group_by(bmi_cat = cut(mass/(height/100)^2, breaks=bmi_breaks)) %>% tally() ``` ## Group metadata You can see underlying group data with `group_keys()`. It has one row for each group and one column for each grouping variable: ```{r group_vars} by_species %>% group_keys() by_sex_gender %>% group_keys() ``` You can see which group each row belongs to with `group_indices()`: ```{r} by_species %>% group_indices() ``` And which rows each group contains with `group_rows()`: ```{r} by_species %>% group_rows() %>% head() ``` Use `group_vars()` if you just want the names of the grouping variables: ```{r} by_species %>% group_vars() by_sex_gender %>% group_vars() ``` ### Changing and adding to grouping variables If you apply `group_by()` to an already grouped dataset, will overwrite the existing grouping variables. For example, the following code groups by `homeworld` instead of `species`: ```{r} by_species %>% group_by(homeworld) %>% tally() ``` To **augment** the grouping, using `.add = TRUE`[^add]. For example, the following code groups by species and homeworld: ```{r} by_species %>% group_by(homeworld, .add = TRUE) %>% tally() ``` [^add]: Note that the argument changed from `add = TRUE` to `.add = TRUE` in dplyr 1.0.0. ### Removing grouping variables To remove all grouping variables, use `ungroup()`: ```{r} by_species %>% ungroup() %>% tally() ``` You can also choose to selectively ungroup by listing the variables you want to remove: ```{r} by_sex_gender %>% ungroup(sex) %>% tally() ``` ## Verbs The following sections describe how grouping affects the main dplyr verbs. ### `summarise()` `summarise()` computes a summary for each group. This means that it starts from `group_keys()`, adding summary variables to the right hand side: ```{r summarise} by_species %>% summarise( n = n(), height = mean(height, na.rm = TRUE) ) ``` The `.groups=` argument controls the grouping structure of the output. The historical behaviour of removing the right hand side grouping variable corresponds to `.groups = "drop_last"` without a message or `.groups = NULL` with a message (the default). ```{r} by_sex_gender %>% summarise(n = n()) %>% group_vars() by_sex_gender %>% summarise(n = n(), .groups = "drop_last") %>% group_vars() ``` Since version 1.0.0 the groups may also be kept (`.groups = "keep"`) or dropped (`.groups = "drop"`). ```{r} by_sex_gender %>% summarise(n = n(), .groups = "keep") %>% group_vars() by_sex_gender %>% summarise(n = n(), .groups = "drop") %>% group_vars() ``` When the output no longer have grouping variables, it becomes ungrouped (i.e. a regular tibble). ### `select()`, `rename()`, and `relocate()` `rename()` and `relocate()` behave identically with grouped and ungrouped data because they only affect the name or position of existing columns. Grouped `select()` is almost identical to ungrouped select, except that it always includes the grouping variables: ```{r select} by_species %>% select(mass) ``` If you don't want the grouping variables, you'll have to first `ungroup()`. (This design is possibly a mistake, but we're stuck with it for now.) ### `arrange()` Grouped `arrange()` is the same as ungrouped `arrange()`, unless you set `.by_group = TRUE`, in which case it will order first by the grouping variables. ```{r} by_species %>% arrange(desc(mass)) %>% relocate(species, mass) by_species %>% arrange(desc(mass), .by_group = TRUE) %>% relocate(species, mass) ``` Note that second example is sorted by `species` (from the `group_by()` statement) and then by `mass` (within species). ### `mutate()` In simple cases with vectorised functions, grouped and ungrouped `mutate()` give the same results. They differ when used with summary functions: ```{r by_homeworld} # Subtract off global mean starwars %>% select(name, homeworld, mass) %>% mutate(standard_mass = mass - mean(mass, na.rm = TRUE)) # Subtract off homeworld mean starwars %>% select(name, homeworld, mass) %>% group_by(homeworld) %>% mutate(standard_mass = mass - mean(mass, na.rm = TRUE)) ``` Or with window functions like `min_rank()`: ```{r} # Overall rank starwars %>% select(name, homeworld, height) %>% mutate(rank = min_rank(height)) # Rank per homeworld starwars %>% select(name, homeworld, height) %>% group_by(homeworld) %>% mutate(rank = min_rank(height)) ``` ### `filter()` A grouped `filter()` effectively does a `mutate()` to generate a logical variable, and then only keeps the rows where the variable is `TRUE`. This means that grouped filters can be used with summary functions. For example, we can find the tallest character of each species: ```{r filter} by_species %>% select(name, species, height) %>% filter(height == max(height)) ``` You can also use `filter()` to remove entire groups. For example, the following code eliminates all groups that only have a single member: ```{r filter_group} by_species %>% filter(n() != 1) %>% tally() ``` ### `slice()` and friends `slice()` and friends (`slice_head()`, `slice_tail()`, `slice_sample()`, `slice_min()` and `slice_max()`) select rows within a group. For example, we can select the first observation within each species: ```{r slice} by_species %>% relocate(species) %>% slice(1) ``` Similarly, we can use `slice_min()` to select the smallest `n` values of a variable: ```{r slice_min} by_species %>% filter(!is.na(height)) %>% slice_min(height, n = 2) ``` dplyr/vignettes/colwise.Rmd0000644000176200001440000003007314366556340015544 0ustar liggesusers--- title: "Column-wise operations" description: > Learn how to easily repeat the same operation across multiple columns using `across()`. output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Column-wise operations} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} knitr::opts_chunk$set(collapse = T, comment = "#>") options(tibble.print_min = 4L, tibble.print_max = 4L) set.seed(1014) ``` It's often useful to perform the same operation on multiple columns, but copying and pasting is both tedious and error prone: ```{r, eval = FALSE} df %>% group_by(g1, g2) %>% summarise(a = mean(a), b = mean(b), c = mean(c), d = mean(d)) ``` (If you're trying to compute `mean(a, b, c, d)` for each row, instead see `vignette("rowwise")`) This vignette will introduce you to the `across()` function, which lets you rewrite the previous code more succinctly: ```{r, eval = FALSE} df %>% group_by(g1, g2) %>% summarise(across(a:d, mean)) ``` We'll start by discussing the basic usage of `across()`, particularly as it applies to `summarise()`, and show how to use it with multiple functions. We'll then show a few uses with other verbs. We'll finish off with a bit of history, showing why we prefer `across()` to our last approach (the `_if()`, `_at()` and `_all()` functions) and how to translate your old code to the new syntax. ```{r setup} library(dplyr, warn.conflicts = FALSE) ``` ## Basic usage `across()` has two primary arguments: * The first argument, `.cols`, selects the columns you want to operate on. It uses tidy selection (like `select()`) so you can pick variables by position, name, and type. * The second argument, `.fns`, is a function or list of functions to apply to each column. This can also be a purrr style formula (or list of formulas) like `~ .x / 2`. (This argument is optional, and you can omit it if you just want to get the underlying data; you'll see that technique used in `vignette("rowwise")`.) Here are a couple of examples of `across()` in conjunction with its favourite verb, `summarise()`. But you can use `across()` with any dplyr verb, as you'll see a little later. ```{r} starwars %>% summarise(across(where(is.character), n_distinct)) starwars %>% group_by(species) %>% filter(n() > 1) %>% summarise(across(c(sex, gender, homeworld), n_distinct)) starwars %>% group_by(homeworld) %>% filter(n() > 1) %>% summarise(across(where(is.numeric), ~ mean(.x, na.rm = TRUE))) ``` Because `across()` is usually used in combination with `summarise()` and `mutate()`, it doesn't select grouping variables in order to avoid accidentally modifying them: ```{r} df <- data.frame(g = c(1, 1, 2), x = c(-1, 1, 3), y = c(-1, -4, -9)) df %>% group_by(g) %>% summarise(across(where(is.numeric), sum)) ``` ### Multiple functions You can transform each variable with more than one function by supplying a named list of functions or lambda functions in the second argument: ```{r} min_max <- list( min = ~min(.x, na.rm = TRUE), max = ~max(.x, na.rm = TRUE) ) starwars %>% summarise(across(where(is.numeric), min_max)) starwars %>% summarise(across(c(height, mass, birth_year), min_max)) ``` Control how the names are created with the `.names` argument which takes a [glue](https://glue.tidyverse.org/) spec: ```{r} starwars %>% summarise(across(where(is.numeric), min_max, .names = "{.fn}.{.col}")) starwars %>% summarise(across(c(height, mass, birth_year), min_max, .names = "{.fn}.{.col}")) ``` If you'd prefer all summaries with the same function to be grouped together, you'll have to expand the calls yourself: ```{r} starwars %>% summarise( across(c(height, mass, birth_year), ~min(.x, na.rm = TRUE), .names = "min_{.col}"), across(c(height, mass, birth_year), ~max(.x, na.rm = TRUE), .names = "max_{.col}") ) ``` (One day this might become an argument to `across()` but we're not yet sure how it would work.) We cannot however use `where(is.numeric)` in that last case because the second `across()` would pick up the variables that were newly created ("min_height", "min_mass" and "min_birth_year"). We can work around this by combining both calls to `across()` into a single expression that returns a tibble: ```{r} starwars %>% summarise( tibble( across(where(is.numeric), ~min(.x, na.rm = TRUE), .names = "min_{.col}"), across(where(is.numeric), ~max(.x, na.rm = TRUE), .names = "max_{.col}") ) ) ``` Alternatively we could reorganize results with `relocate()`: ```{r} starwars %>% summarise(across(where(is.numeric), min_max, .names = "{.fn}.{.col}")) %>% relocate(starts_with("min")) ``` ### Current column If you need to, you can access the name of the "current" column inside by calling `cur_column()`. This can be useful if you want to perform some sort of context dependent transformation that's already encoded in a vector: ```{r} df <- tibble(x = 1:3, y = 3:5, z = 5:7) mult <- list(x = 1, y = 10, z = 100) df %>% mutate(across(all_of(names(mult)), ~ .x * mult[[cur_column()]])) ``` ### Gotchas Be careful when combining numeric summaries with `where(is.numeric)`: ```{r} df <- data.frame(x = c(1, 2, 3), y = c(1, 4, 9)) df %>% summarise(n = n(), across(where(is.numeric), sd)) ``` Here `n` becomes `NA` because `n` is numeric, so the `across()` computes its standard deviation, and the standard deviation of 3 (a constant) is `NA`. You probably want to compute `n()` last to avoid this problem: ```{r} df %>% summarise(across(where(is.numeric), sd), n = n()) ``` Alternatively, you could explicitly exclude `n` from the columns to operate on: ```{r} df %>% summarise(n = n(), across(where(is.numeric) & !n, sd)) ``` Another approach is to combine both the call to `n()` and `across()` in a single expression that returns a tibble: ```{r} df %>% summarise( tibble(n = n(), across(where(is.numeric), sd)) ) ``` ### Other verbs So far we've focused on the use of `across()` with `summarise()`, but it works with any other dplyr verb that uses data masking: * Rescale all numeric variables to range 0-1: ```{r} rescale01 <- function(x) { rng <- range(x, na.rm = TRUE) (x - rng[1]) / (rng[2] - rng[1]) } df <- tibble(x = 1:4, y = rnorm(4)) df %>% mutate(across(where(is.numeric), rescale01)) ``` For some verbs, like `group_by()`, `count()` and `distinct()`, you don't need to supply a summary function, but it can be useful to use tidy-selection to dynamically select a set of columns. In those cases, we recommend using the complement to `across()`, `pick()`, which works like `across()` but doesn't apply any functions and instead returns a data frame containing the selected columns. * Find all distinct ```{r} starwars %>% distinct(pick(contains("color"))) ``` * Count all combinations of variables with a given pattern: ```{r} starwars %>% count(pick(contains("color")), sort = TRUE) ``` `across()` doesn't work with `select()` or `rename()` because they already use tidy select syntax; if you want to transform column names with a function, you can use `rename_with()`. ### filter() We cannot directly use `across()` in `filter()` because we need an extra step to combine the results. To that end, `filter()` has two special purpose companion functions: * `if_any()` keeps the rows where the predicate is true for *at least one* selected column: ```{r} starwars %>% filter(if_any(everything(), ~ !is.na(.x))) ``` * `if_all()` keeps the rows where the predicate is true for *all* selected columns: ```{r} starwars %>% filter(if_all(everything(), ~ !is.na(.x))) ``` ## `_if`, `_at`, `_all` Prior versions of dplyr allowed you to apply a function to multiple columns in a different way: using functions with `_if`, `_at`, and `_all()` suffixes. These functions solved a pressing need and are used by many people, but are now superseded. That means that they'll stay around, but won't receive any new features and will only get critical bug fixes. ### Why do we like `across()`? Why did we decide to move away from these functions in favour of `across()`? 1. `across()` makes it possible to express useful summaries that were previously impossible: ```{r, eval = FALSE} df %>% group_by(g1, g2) %>% summarise( across(where(is.numeric), mean), across(where(is.factor), nlevels), n = n(), ) ``` 1. `across()` reduces the number of functions that dplyr needs to provide. This makes dplyr easier for you to use (because there are fewer functions to remember) and easier for us to implement new verbs (since we only need to implement one function, not four). 1. `across()` unifies `_if` and `_at` semantics so that you can select by position, name, and type, and you can now create compound selections that were previously impossible. For example, you can now transform all numeric columns whose name begins with "x": `across(where(is.numeric) & starts_with("x"))`. 1. `across()` doesn't need to use `vars()`. The `_at()` functions are the only place in dplyr where you have to manually quote variable names, which makes them a little weird and hence harder to remember. ### Why did it take so long to discover `across()`? It's disappointing that we didn't discover `across()` earlier, and instead worked through several false starts (first not realising that it was a common problem, then with the `_each()` functions, and most recently with the `_if()`/`_at()`/`_all()` functions). But `across()` couldn't work without three recent discoveries: * You can have a column of a data frame that is itself a data frame. This is something provided by base R, but it's not very well documented, and it took a while to see that it was useful, not just a theoretical curiosity. * We can use data frames to allow summary functions to return multiple columns. * We can use the absence of an outer name as a convention that you want to unpack a data frame column into individual columns. ### How do you convert existing code? Fortunately, it's generally straightforward to translate your existing code to use `across()`: * Strip the `_if()`, `_at()` and `_all()` suffix off the function. * Call `across()`. The first argument will be: 1. For `_if()`, the old second argument wrapped in `where()`. 1. For `_at()`, the old second argument, with the call to `vars()` removed. 1. For `_all()`, `everything()`. The subsequent arguments can be copied as is. For example: ```{r, results = FALSE} df %>% mutate_if(is.numeric, ~mean(.x, na.rm = TRUE)) # -> df %>% mutate(across(where(is.numeric), ~mean(.x, na.rm = TRUE))) df %>% mutate_at(vars(c(x, starts_with("y"))), mean) # -> df %>% mutate(across(c(x, starts_with("y")), mean)) df %>% mutate_all(mean) # -> df %>% mutate(across(everything(), mean)) ``` There are a few exceptions to this rule: * `rename_*()` and `select_*()` follow a different pattern. They already have select semantics, so are generally used in a different way that doesn't have a direct equivalent with `across()`; use the new `rename_with()` instead. * Previously, `filter_*()` were paired with the `all_vars()` and `any_vars()` helpers. The new helpers `if_any()` and `if_all()` can be used inside `filter()` to keep rows for which the predicate is true for at least one, or all selected columns: ```{r} df <- tibble(x = c("a", "b"), y = c(1, 1), z = c(-1, 1)) # Find all rows where EVERY numeric variable is greater than zero df %>% filter(if_all(where(is.numeric), ~ .x > 0)) # Find all rows where ANY numeric variable is greater than zero df %>% filter(if_any(where(is.numeric), ~ .x > 0)) ``` * When used in a `mutate()`, all transformations performed by an `across()` are applied at once. This is different to the behaviour of `mutate_if()`, `mutate_at()`, and `mutate_all()`, which apply the transformations one at a time. We expect that you'll generally find the new behaviour less surprising: ```{r} df <- tibble(x = 2, y = 4, z = 8) df %>% mutate_all(~ .x / y) df %>% mutate(across(everything(), ~ .x / y)) ``` dplyr/R/0000755000176200001440000000000014525503021011602 5ustar liggesusersdplyr/R/nth-value.R0000644000176200001440000001127414406402754013646 0ustar liggesusers#' Extract the first, last, or nth value from a vector #' #' These are useful helpers for extracting a single value from a vector. They #' are guaranteed to return a meaningful value, even when the input is shorter #' than expected. You can also provide an optional secondary vector that defines #' the ordering. #' #' @details #' For most vector types, `first(x)`, `last(x)`, and `nth(x, n)` work like #' `x[[1]]`, `x[[length(x)]`, and `x[[n]]`, respectively. The primary exception #' is data frames, where they instead retrieve rows, i.e. `x[1, ]`, `x[nrow(x), #' ]`, and `x[n, ]`. This is consistent with the tidyverse/vctrs principle which #' treats data frames as a vector of rows, rather than a vector of columns. #' #' @param x A vector #' @param n For `nth()`, a single integer specifying the position. #' Negative integers index from the end (i.e. `-1L` will return the #' last value in the vector). #' @param order_by An optional vector the same size as `x` used to determine the #' order. #' @param default A default value to use if the position does not exist in `x`. #' #' If `NULL`, the default, a missing value is used. #' #' If supplied, this must be a single value, which will be cast to the type of #' `x`. #' #' When `x` is a list , `default` is allowed to be any value. There are no #' type or size restrictions in this case. #' @param na_rm Should missing values in `x` be removed before extracting the #' value? #' #' @return #' If `x` is a list, a single element from that list. Otherwise, a vector the #' same type as `x` with size 1. #' #' @export #' @examples #' x <- 1:10 #' y <- 10:1 #' #' first(x) #' last(y) #' #' nth(x, 1) #' nth(x, 5) #' nth(x, -2) #' #' # `first()` and `last()` are often useful in `summarise()` #' df <- tibble(x = x, y = y) #' df %>% #' summarise( #' across(x:y, first, .names = "{col}_first"), #' y_last = last(y) #' ) #' #' # Selecting a position that is out of bounds returns a default value #' nth(x, 11) #' nth(x, 0) #' #' # This out of bounds behavior also applies to empty vectors #' first(integer()) #' #' # You can customize the default value with `default` #' nth(x, 11, default = -1L) #' first(integer(), default = 0L) #' #' # `order_by` provides optional ordering #' last(x) #' last(x, order_by = y) #' #' # `na_rm` removes missing values before extracting the value #' z <- c(NA, NA, 1, 3, NA, 5, NA) #' first(z) #' first(z, na_rm = TRUE) #' last(z, na_rm = TRUE) #' nth(z, 3, na_rm = TRUE) #' #' # For data frames, these select entire rows #' df <- tibble(a = 1:5, b = 6:10) #' first(df) #' nth(df, 4) nth <- function(x, n, order_by = NULL, default = NULL, na_rm = FALSE) { size <- vec_size(x) vec_check_size(n, size = 1L) n <- vec_cast(n, to = integer()) if (!is.null(order_by)) { vec_check_size(order_by, size = size) } default <- check_nth_default(default, x = x) check_bool(na_rm) if (na_rm && vec_any_missing(x)) { not_missing <- !vec_detect_missing(x) x <- vec_slice(x, not_missing) size <- vec_size(x) if (!is.null(order_by)) { order_by <- vec_slice(order_by, not_missing) } } if (is.na(n)) { abort("`n` can't be `NA`.") } if (n < 0L) { # Negative values index from RHS n <- size + n + 1L } if (n <= 0L || n > size) { return(default) } if (!is.null(order_by)) { order <- vec_order_radix(order_by) n <- order[[n]] } vec_slice2(x, n) } #' @export #' @rdname nth first <- function(x, order_by = NULL, default = NULL, na_rm = FALSE) { nth(x, 1L, order_by = order_by, default = default, na_rm = na_rm) } #' @export #' @rdname nth last <- function(x, order_by = NULL, default = NULL, na_rm = FALSE) { nth(x, -1L, order_by = order_by, default = default, na_rm = na_rm) } check_nth_default <- function(default, x, ..., error_call = caller_env()) { check_dots_empty0(...) if (obj_is_list(x)) { # Very special behavior for lists, since we use `[[` on them. # Valid to use any `default` here (even non-vectors). # And `default = NULL` is the correct default `default` for lists. return(default) } if (is.null(default)) { return(vec_init(x)) } vec_check_size(default, size = 1L, call = error_call) default <- vec_cast( x = default, to = x, x_arg = "default", to_arg = "x", call = error_call ) default } vec_slice2 <- function(x, i) { # Our unimplemented vctrs equivalent of `[[` # https://github.com/r-lib/vctrs/pull/1228/ # A real implementation would use this, but it is too slow right now # and we know `i` is a valid integer index (#6682) # i <- vec_as_location2(i, vec_size(x)) if (obj_is_list(x)) { out <- .subset2(x, i) } else { out <- vec_slice(x, i) out <- vec_set_names(out, NULL) } out } dplyr/R/rows.R0000644000176200001440000004533114406402754012736 0ustar liggesusers#' Manipulate individual rows #' #' @description #' #' These functions provide a framework for modifying rows in a table using a #' second table of data. The two tables are matched `by` a set of key variables #' whose values typically uniquely identify each row. The functions are inspired #' by SQL's `INSERT`, `UPDATE`, and `DELETE`, and can optionally modify #' `in_place` for selected backends. #' #' * `rows_insert()` adds new rows (like `INSERT`). By default, key values in #' `y` must not exist in `x`. #' * `rows_append()` works like `rows_insert()` but ignores keys. #' * `rows_update()` modifies existing rows (like `UPDATE`). Key values in `y` #' must be unique, and, by default, key values in `y` must exist in `x`. #' * `rows_patch()` works like `rows_update()` but only overwrites `NA` values. #' * `rows_upsert()` inserts or updates depending on whether or not the #' key value in `y` already exists in `x`. Key values in `y` must be unique. #' * `rows_delete()` deletes rows (like `DELETE`). By default, key values in `y` #' must exist in `x`. #' #' @section Methods: #' These function are **generic**s, which means that packages can provide #' implementations (methods) for other classes. See the documentation of #' individual methods for extra arguments and differences in behaviour. #' #' Methods available in currently loaded packages: #' #' * `rows_insert()`: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("rows_insert")}. #' * `rows_append()`: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("rows_append")}. #' * `rows_update()`: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("rows_update")}. #' * `rows_patch()`: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("rows_patch")}. #' * `rows_upsert()`: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("rows_upsert")}. #' * `rows_delete()`: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("rows_delete")}. #' #' @inheritParams left_join #' @param x,y A pair of data frames or data frame extensions (e.g. a tibble). #' `y` must have the same columns of `x` or a subset. #' @param by An unnamed character vector giving the key columns. The key columns #' must exist in both `x` and `y`. Keys typically uniquely identify each row, #' but this is only enforced for the key values of `y` when `rows_update()`, #' `rows_patch()`, or `rows_upsert()` are used. #' #' By default, we use the first column in `y`, since the first column is #' a reasonable place to put an identifier variable. #' @param in_place Should `x` be modified in place? This argument is only #' relevant for mutable backends (e.g. databases, data.tables). #' #' When `TRUE`, a modified version of `x` is returned invisibly; #' when `FALSE`, a new object representing the resulting changes is returned. #' @param conflict For `rows_insert()`, how should keys in `y` that conflict #' with keys in `x` be handled? A conflict arises if there is a key in `y` #' that already exists in `x`. #' #' One of: #' - `"error"`, the default, will error if there are any keys in `y` that #' conflict with keys in `x`. #' - `"ignore"` will ignore rows in `y` with keys that conflict with keys in #' `x`. #' @param unmatched For `rows_update()`, `rows_patch()`, and `rows_delete()`, #' how should keys in `y` that are unmatched by the keys in `x` be handled? #' #' One of: #' - `"error"`, the default, will error if there are any keys in `y` that #' are unmatched by the keys in `x`. #' - `"ignore"` will ignore rows in `y` with keys that are unmatched by the #' keys in `x`. #' @returns #' An object of the same type as `x`. The order of the rows and columns of `x` #' is preserved as much as possible. The output has the following properties: #' #' * `rows_update()` and `rows_patch()` preserve the number of rows; #' `rows_insert()`, `rows_append()`, and `rows_upsert()` return all existing #' rows and potentially new rows; `rows_delete()` returns a subset of the #' rows. #' * Columns are not added, removed, or relocated, though the data may be #' updated. #' * Groups are taken from `x`. #' * Data frame attributes are taken from `x`. #' #' If `in_place = TRUE`, the result will be returned invisibly. #' @name rows #' @examples #' data <- tibble(a = 1:3, b = letters[c(1:2, NA)], c = 0.5 + 0:2) #' data #' #' # Insert #' rows_insert(data, tibble(a = 4, b = "z")) #' #' # By default, if a key in `y` matches a key in `x`, then it can't be inserted #' # and will throw an error. Alternatively, you can ignore rows in `y` #' # containing keys that conflict with keys in `x` with `conflict = "ignore"`, #' # or you can use `rows_append()` to ignore keys entirely. #' try(rows_insert(data, tibble(a = 3, b = "z"))) #' rows_insert(data, tibble(a = 3, b = "z"), conflict = "ignore") #' rows_append(data, tibble(a = 3, b = "z")) #' #' # Update #' rows_update(data, tibble(a = 2:3, b = "z")) #' rows_update(data, tibble(b = "z", a = 2:3), by = "a") #' #' # Variants: patch and upsert #' rows_patch(data, tibble(a = 2:3, b = "z")) #' rows_upsert(data, tibble(a = 2:4, b = "z")) #' #' # Delete and truncate #' rows_delete(data, tibble(a = 2:3)) #' rows_delete(data, tibble(a = 2:3, b = "b")) #' #' # By default, for update, patch, and delete it is an error if a key in `y` #' # doesn't exist in `x`. You can ignore rows in `y` that have unmatched keys #' # with `unmatched = "ignore"`. #' y <- tibble(a = 3:4, b = "z") #' try(rows_update(data, y, by = "a")) #' rows_update(data, y, by = "a", unmatched = "ignore") #' rows_patch(data, y, by = "a", unmatched = "ignore") #' rows_delete(data, y, by = "a", unmatched = "ignore") NULL #' @rdname rows #' @export rows_insert <- function(x, y, by = NULL, ..., conflict = c("error", "ignore"), copy = FALSE, in_place = FALSE) { UseMethod("rows_insert") } #' @export rows_insert.data.frame <- function(x, y, by = NULL, ..., conflict = c("error", "ignore"), copy = FALSE, in_place = FALSE) { check_dots_empty() rows_df_in_place(in_place) y <- auto_copy(x, y, copy = copy) by <- rows_check_by(by, y) rows_check_x_contains_y(x, y) rows_check_contains_by(x, by, "x") rows_check_contains_by(y, by, "y") y <- rows_cast_y(y, x) x_key <- dplyr_col_select(x, by) y_key <- dplyr_col_select(y, by) keep <- rows_check_y_conflict(x_key, y_key, conflict) if (!is.null(keep)) { y <- dplyr_row_slice(y, keep) } rows_bind(x, y) } #' @rdname rows #' @export rows_append <- function(x, y, ..., copy = FALSE, in_place = FALSE) { UseMethod("rows_append") } #' @export rows_append.data.frame <- function(x, y, ..., copy = FALSE, in_place = FALSE) { check_dots_empty() rows_df_in_place(in_place) y <- auto_copy(x, y, copy = copy) rows_check_x_contains_y(x, y) y <- rows_cast_y(y, x) rows_bind(x, y) } #' @rdname rows #' @export rows_update <- function(x, y, by = NULL, ..., unmatched = c("error", "ignore"), copy = FALSE, in_place = FALSE) { UseMethod("rows_update", x) } #' @export rows_update.data.frame <- function(x, y, by = NULL, ..., unmatched = c("error", "ignore"), copy = FALSE, in_place = FALSE) { check_dots_empty() rows_df_in_place(in_place) y <- auto_copy(x, y, copy = copy) by <- rows_check_by(by, y) rows_check_x_contains_y(x, y) rows_check_contains_by(x, by, "x") rows_check_contains_by(y, by, "y") x_key <- dplyr_col_select(x, by) y_key <- dplyr_col_select(y, by) rows_check_unique(y_key, "y") args <- vec_cast_common(x = x_key, y = y_key) x_key <- args$x y_key <- args$y values_names <- setdiff(names(y), names(y_key)) x_values <- dplyr_col_select(x, values_names) y_values <- dplyr_col_select(y, values_names) y_values <- rows_cast_y(y_values, x_values) keep <- rows_check_y_unmatched(x_key, y_key, unmatched) if (!is.null(keep)) { y_key <- dplyr_row_slice(y_key, keep) y_values <- dplyr_row_slice(y_values, keep) } loc <- vec_match(x_key, y_key) match <- !is.na(loc) y_loc <- loc[match] x_loc <- which(match) y_values <- dplyr_row_slice(y_values, y_loc) x_values <- vec_assign(x_values, x_loc, y_values) x_values <- dplyr_new_list(x_values) x <- dplyr_col_modify(x, x_values) x } #' @rdname rows #' @export rows_patch <- function(x, y, by = NULL, ..., unmatched = c("error", "ignore"), copy = FALSE, in_place = FALSE) { UseMethod("rows_patch", x) } #' @export rows_patch.data.frame <- function(x, y, by = NULL, ..., unmatched = c("error", "ignore"), copy = FALSE, in_place = FALSE) { check_dots_empty() rows_df_in_place(in_place) y <- auto_copy(x, y, copy = copy) by <- rows_check_by(by, y) rows_check_x_contains_y(x, y) rows_check_contains_by(x, by, "x") rows_check_contains_by(y, by, "y") x_key <- dplyr_col_select(x, by) y_key <- dplyr_col_select(y, by) rows_check_unique(y_key, "y") args <- vec_cast_common(x = x_key, y = y_key) x_key <- args$x y_key <- args$y values_names <- setdiff(names(y), names(y_key)) x_values <- dplyr_col_select(x, values_names) y_values <- dplyr_col_select(y, values_names) y_values <- rows_cast_y(y_values, x_values) keep <- rows_check_y_unmatched(x_key, y_key, unmatched) if (!is.null(keep)) { y_key <- dplyr_row_slice(y_key, keep) y_values <- dplyr_row_slice(y_values, keep) } loc <- vec_match(x_key, y_key) match <- !is.na(loc) y_loc <- loc[match] x_loc <- which(match) x_slice <- dplyr_row_slice(x_values, x_loc) x_slice <- dplyr_new_list(x_slice) y_slice <- dplyr_row_slice(y_values, y_loc) y_slice <- dplyr_new_list(y_slice) x_patched <- map2(x_slice, y_slice, coalesce) x_patched <- new_data_frame(x_patched, n = length(x_loc)) x_values <- vec_assign(x_values, x_loc, x_patched) x_values <- dplyr_new_list(x_values) x <- dplyr_col_modify(x, x_values) x } #' @rdname rows #' @export rows_upsert <- function(x, y, by = NULL, ..., copy = FALSE, in_place = FALSE) { UseMethod("rows_upsert", x) } #' @export rows_upsert.data.frame <- function(x, y, by = NULL, ..., copy = FALSE, in_place = FALSE) { check_dots_empty() rows_df_in_place(in_place) y <- auto_copy(x, y, copy = copy) by <- rows_check_by(by, y) rows_check_x_contains_y(x, y) rows_check_contains_by(x, by, "x") rows_check_contains_by(y, by, "y") x_key <- dplyr_col_select(x, by) y_key <- dplyr_col_select(y, by) rows_check_unique(y_key, "y") args <- vec_cast_common(x = x_key, y = y_key) x_key <- args$x y_key <- args$y values_names <- setdiff(names(y), names(y_key)) x_values <- dplyr_col_select(x, values_names) y_values <- dplyr_col_select(y, values_names) y_values <- rows_cast_y(y_values, x_values) loc <- vec_match(x_key, y_key) match <- !is.na(loc) y_loc <- loc[match] x_loc <- which(match) # Update y_values <- dplyr_row_slice(y_values, y_loc) x_values <- vec_assign(x_values, x_loc, y_values) x_values <- dplyr_new_list(x_values) x <- dplyr_col_modify(x, x_values) # Insert y_size <- vec_size(y_key) y_extra <- vec_as_location_invert(y_loc, y_size) y <- dplyr_row_slice(y, y_extra) y <- rows_cast_y(y, x) x <- rows_bind(x, y) x } #' @rdname rows #' @export rows_delete <- function(x, y, by = NULL, ..., unmatched = c("error", "ignore"), copy = FALSE, in_place = FALSE) { UseMethod("rows_delete", x) } #' @export rows_delete.data.frame <- function(x, y, by = NULL, ..., unmatched = c("error", "ignore"), copy = FALSE, in_place = FALSE) { check_dots_empty() rows_df_in_place(in_place) y <- auto_copy(x, y, copy = copy) by <- rows_check_by(by, y) rows_check_contains_by(x, by, "x") rows_check_contains_by(y, by, "y") x_key <- dplyr_col_select(x, by) y_key <- dplyr_col_select(y, by) args <- vec_cast_common(x = x_key, y = y_key) x_key <- args$x y_key <- args$y keep <- rows_check_y_unmatched(x_key, y_key, unmatched) if (!is.null(keep)) { y_key <- dplyr_row_slice(y_key, keep) } extra <- setdiff(names(y), names(y_key)) if (!is_empty(extra)) { message <- glue("Ignoring extra `y` columns: ", commas(tick_if_needed(extra))) inform(message, class = c("dplyr_message_delete_extra_cols", "dplyr_message")) } loc <- vec_match(x_key, y_key) unmatched <- is.na(loc) x_loc <- which(unmatched) dplyr_row_slice(x, x_loc) } # helpers ----------------------------------------------------------------- rows_check_by <- function(by, y, ..., error_call = caller_env()) { check_dots_empty() if (is.null(by)) { if (ncol(y) == 0L) { abort("`y` must have at least one column.", call = error_call) } by <- names(y)[[1]] inform( message = glue("Matching, by = \"{by}\""), class = c("dplyr_message_matching_by", "dplyr_message") ) } if (!is.character(by)) { abort("`by` must be a character vector.", call = error_call) } if (is_empty(by)) { abort("`by` must specify at least 1 column.", call = error_call) } if (!all(names2(by) == "")) { abort("`by` must be unnamed.", call = error_call) } by } rows_check_x_contains_y <- function(x, y, ..., error_call = caller_env()) { check_dots_empty() bad <- setdiff(names(y), names(x)) if (!is_empty(bad)) { bad <- err_vars(bad) message <- c( "All columns in `y` must exist in `x`.", i = glue("The following columns only exist in `y`: {bad}.") ) abort(message, call = error_call) } invisible() } rows_cast_y <- function(y, x, ..., call = caller_env()) { vec_cast(x = y, to = x, x_arg = "y", to_arg = "x", call = call) } rows_check_contains_by <- function(x, by, arg, ..., error_call = caller_env()) { check_dots_empty() missing <- setdiff(by, names(x)) if (is_empty(missing)) { return(invisible()) } missing <- err_vars(missing) message <- c( "All columns specified through `by` must exist in `x` and `y`.", i = glue("The following columns are missing from `{arg}`: {missing}.") ) abort(message, call = error_call) } rows_check_unique <- function(x, arg, ..., error_call = caller_env()) { check_dots_empty() if (!vec_duplicate_any(x)) { return(invisible()) } duplicated <- vec_duplicate_detect(x) duplicated <- which(duplicated) duplicated <- err_locs(duplicated) message <- c( glue("`{arg}` key values must be unique."), i = glue("The following rows contain duplicate key values: {duplicated}.") ) abort(message, call = error_call) } rows_check_y_conflict <- function(x_key, y_key, conflict, ..., error_call = caller_env()) { check_dots_empty() conflict <- rows_check_conflict(conflict, error_call = error_call) keep <- NULL rows_matched <- vec_in(y_key, x_key) if (any(rows_matched)) { if (conflict == "error") { rows_matched <- which(rows_matched) rows_matched <- err_locs(rows_matched) message <- c( "`y` can't contain keys that already exist in `x`.", i = glue("The following rows in `y` have keys that already exist in `x`: {rows_matched}."), i = "Use `conflict = \"ignore\"` if you want to ignore these `y` rows." ) abort(message, call = error_call) } else if (conflict == "ignore") { keep <- which(!rows_matched) } else { abort("Unknown `conflict` value.", .internal = TRUE) } } keep } rows_check_y_unmatched <- function(x_key, y_key, unmatched, ..., error_call = caller_env()) { check_dots_empty() unmatched <- rows_check_unmatched(unmatched, error_call = error_call) keep <- NULL rows_unmatched <- !vec_in(y_key, x_key) if (any(rows_unmatched)) { if (unmatched == "error") { rows_unmatched <- which(rows_unmatched) rows_unmatched <- err_locs(rows_unmatched) message <- c( "`y` must contain keys that already exist in `x`.", i = glue("The following rows in `y` have keys that don't exist in `x`: {rows_unmatched}."), i = "Use `unmatched = \"ignore\"` if you want to ignore these `y` rows." ) abort(message, call = error_call) } else if (unmatched == "ignore") { keep <- which(!rows_unmatched) } else { abort("Unknown `unmatched` value.", .internal = TRUE) } } keep } rows_check_conflict <- function(conflict, ..., error_call = caller_env()) { check_dots_empty0(...) arg_match0( arg = conflict, values = c("error", "ignore"), error_call = error_call ) } rows_check_unmatched <- function(unmatched, ..., error_call = caller_env()) { check_dots_empty0(...) arg_match0( arg = unmatched, values = c("error", "ignore"), error_call = error_call ) } rows_df_in_place <- function(in_place, error_call = caller_env()) { if (is_true(in_place)) { msg <- "Data frames only support `in_place = FALSE`." abort(msg, call = error_call) } } rows_bind <- function(x, y) { dplyr_reconstruct(vctrs::vec_rbind(x, y), x) } vec_as_location_invert <- function(i, n) { if (is_empty(i)) { seq_len(n) } else { vec_as_location(-i, n) } } dplyr/R/colwise.R0000644000176200001440000002443314406402754013411 0ustar liggesusers#' Operate on a selection of variables #' #' @description #' `r lifecycle::badge("superseded")` #' #' Scoped verbs (`_if`, `_at`, `_all`) have been superseded by the use of #' [pick()] or [across()] in an existing verb. See `vignette("colwise")` for #' details. #' #' The variants suffixed with `_if`, `_at` or `_all` apply an #' expression (sometimes several) to all variables within a specified #' subset. This subset can contain all variables (`_all` variants), a #' [vars()] selection (`_at` variants), or variables selected with a #' predicate (`_if` variants). #' #' The verbs with scoped variants are: #' #' * [mutate()], [transmute()] and [summarise()]. See [summarise_all()]. #' * [filter()]. See [filter_all()]. #' * [group_by()]. See [group_by_all()]. #' * [rename()] and [select()]. See [select_all()]. #' * [arrange()]. See [arrange_all()] #' #' There are three kinds of scoped variants. They differ in the scope #' of the variable selection on which operations are applied: #' #' * Verbs suffixed with `_all()` apply an operation on all variables. #' #' * Verbs suffixed with `_at()` apply an operation on a subset of #' variables specified with the quoting function [vars()]. This #' quoting function accepts [tidyselect::vars_select()] helpers like #' [starts_with()]. Instead of a [vars()] selection, you can also #' supply an [integerish][rlang::is_integerish] vector of column #' positions or a character vector of column names. #' #' * Verbs suffixed with `_if()` apply an operation on the subset of #' variables for which a predicate function returns `TRUE`. Instead #' of a predicate function, you can also supply a logical vector. #' #' @param .tbl A `tbl` object. #' @param .funs A function `fun`, a quosure style lambda `~ fun(.)` or a list of either form. #' #' @param .vars A list of columns generated by [vars()], #' a character vector of column names, a numeric vector of column #' positions, or `NULL`. #' @param .predicate A predicate function to be applied to the columns #' or a logical vector. The variables for which `.predicate` is or #' returns `TRUE` are selected. This argument is passed to #' [rlang::as_function()] and thus supports quosure-style lambda #' functions and strings representing function names. #' @param ... Additional arguments for the function calls in #' `.funs`. These are evaluated only once, with [tidy #' dots][rlang::tidy-dots] support. #' #' @section Grouping variables: #' #' Most of these operations also apply on the grouping variables when #' they are part of the selection. This includes: #' #' * [arrange_all()], [arrange_at()], and [arrange_if()] #' * [distinct_all()], [distinct_at()], and [distinct_if()] #' * [filter_all()], [filter_at()], and [filter_if()] #' * [group_by_all()], [group_by_at()], and [group_by_if()] #' * [select_all()], [select_at()], and [select_if()] #' #' This is not the case for summarising and mutating variants where #' operations are *not* applied on grouping variables. The behaviour #' depends on whether the selection is **implicit** (`all` and `if` #' selections) or **explicit** (`at` selections). Grouping variables #' covered by explicit selections (with [summarise_at()], #' [mutate_at()], and [transmute_at()]) are always an error. For #' implicit selections, the grouping variables are always ignored. In #' this case, the level of verbosity depends on the kind of operation: #' #' * Summarising operations ([summarise_all()] and [summarise_if()]) #' ignore grouping variables silently because it is obvious that #' operations are not applied on grouping variables. #' #' * On the other hand it isn't as obvious in the case of mutating #' operations ([mutate_all()], [mutate_if()], [transmute_all()], and #' [transmute_if()]). For this reason, they issue a message #' indicating which grouping variables are ignored. #' #' @name scoped NULL #' Select variables #' #' @description #' `r lifecycle::badge("superseded")` #' #' `vars()` is superseded because it is only needed for the scoped verbs (i.e. #' [mutate_at()], [summarise_at()], and friends), which have been been #' superseded in favour of [across()]. See `vignette("colwise")` for details. #' #' This helper is intended to provide tidy-select semantics for scoped verbs #' like `mutate_at()` and `summarise_at()`. Note that anywhere you can supply #' `vars()` specification, you can also supply a numeric vector of column #' positions or a character vector of column names. #' #' @param ... <[`tidy-select`][dplyr_tidy_select]> Variables to operate on. #' @seealso [all_vars()] and [any_vars()] for other quoting #' functions that you can use with scoped verbs. #' @export vars <- function(...) { quos(...) } #' Apply predicate to all variables #' #' @description #' `r lifecycle::badge("superseded")` #' #' `all_vars()` and `any_vars()` were only needed for the scoped verbs, which #' have been superseded by the use of [across()] in an existing verb. See #' `vignette("colwise")` for details. #' #' These quoting functions signal to scoped filtering verbs #' (e.g. [filter_if()] or [filter_all()]) that a predicate expression #' should be applied to all relevant variables. The `all_vars()` #' variant takes the intersection of the predicate expressions with #' `&` while the `any_vars()` variant takes the union with `|`. #' #' @param expr <[`data-masking`][rlang::args_data_masking]> An expression that #' returns a logical vector, using `.` to refer to the "current" variable. #' @seealso [vars()] for other quoting functions that you #' can use with scoped verbs. #' @export all_vars <- function(expr) { lifecycle::signal_stage("superseded", "all_vars()") structure(enquo(expr), class = c("all_vars", "quosure", "formula")) } #' @rdname all_vars #' @export any_vars <- function(expr) { lifecycle::signal_stage("superseded", "any_vars()") structure(enquo(expr), class = c("any_vars", "quosure", "formula")) } #' @export print.all_vars <- function(x, ...) { cat("\n") NextMethod() } #' @export print.any_vars <- function(x, ...) { cat("\n") NextMethod() } # Requires tbl_vars() method tbl_at_vars <- function(tbl, vars, .include_group_vars = FALSE, error_call = caller_env()) { if (.include_group_vars) { tibble_vars <- tbl_vars(tbl) } else { tibble_vars <- tbl_nongroup_vars(tbl) } if (is_null(vars)) { character() } else if (is_integerish(vars)) { tibble_vars[vars] } else if (is_quosures(vars) || is_character(vars)) { out <- tidyselect::vars_select(tibble_vars, !!!vars) if (!any(have_name(vars))) { names(out) <- NULL } out } else { msg <- glue("`.vars` must be a character/numeric vector or a `vars()` object, not {obj_type_friendly(vars)}.") abort(msg, call = error_call) } } tbl_at_syms <- function(tbl, vars, .include_group_vars = FALSE, error_call = caller_env()) { vars <- tbl_at_vars(tbl, vars, .include_group_vars = .include_group_vars, error_call = error_call) set_names(syms(vars), names(vars)) } # Requires tbl_vars(), `[[`() and length() methods tbl_if_vars <- function(.tbl, .p, .env, ..., .include_group_vars = FALSE, error_call = caller_env()) { if (.include_group_vars) { tibble_vars <- tbl_vars(.tbl) } else { tibble_vars <- tbl_nongroup_vars(.tbl) } if (is_logical(.p)) { if (length(.p) != length(tibble_vars)) { bullets <- c( "`.p` is invalid.", x = "`.p` should have the same size as the number of variables in the tibble.", i = glue("`.p` is size {length(.p)}."), i = glue("The tibble has {length(tibble_vars)} columns, {including} the grouping variables.", including = if (.include_group_vars) "including" else "non including") ) abort(bullets, call = error_call) } return(syms(tibble_vars[.p])) } .tbl <- tbl_ptype(.tbl) if (is_fun_list(.p) || is_list(.p)) { if (length(.p) != 1) { msg <- glue("`.predicate` must have length 1, not {length(.p)}.") abort(msg, call = error_call) } .p <- .p[[1]] } if (is_quosure(.p)) { .p <- quo_as_function(.p) } else { .p <- as_function(.p, .env) } n <- length(tibble_vars) selected <- new_logical(n) for (i in seq_len(n)) { column <- pull(.tbl, tibble_vars[[.env$i]]) cond <- eval_tidy(.p(column, ...)) if (!is.logical(cond) || length(cond) != 1) { bullets <- c( "`.p` is invalid.", x = "`.p` should return a single logical.", i = if(is.logical(cond)) { glue("`.p` returns a size {length(cond)} for column `{tibble_vars[[i]]}`.") } else { glue("`.p` returns a <{vec_ptype_full(cond)}> for column `{tibble_vars[[i]]}`.") } ) abort(bullets, call = error_call) } selected[[i]] <- isTRUE(cond) } tibble_vars[selected] } tbl_if_syms <- function(.tbl, .p, .env, ..., .include_group_vars = FALSE, error_call = caller_env()) { syms(tbl_if_vars(.tbl, .p, .env, ..., .include_group_vars = .include_group_vars, error_call = error_call)) } #' Return a prototype of a tbl #' #' Used in `_if` functions to enable type-based selection even when the data #' is lazily generated. Should either return the complete tibble, or if that #' can not be computed quickly, a 0-row tibble where the columns are of #' the correct type. #' #' @export #' @keywords internal tbl_ptype <- function(.data) { UseMethod("tbl_ptype") } #' @export tbl_ptype.default <- function(.data) { if (inherits(.data, "tbl_lazy")) { # TODO: remove once moved to dplyr inform("Applying predicate on the first 100 rows") collect(.data, n = 100) } else { .data } } # The lambda must inherit from: # - Execution environment (bound arguments with purrr lambda syntax) # - Lexical environment (local variables) # - Data mask (other columns) # # So we need: # - Inheritance from closure -> lexical # - A maskable quosure as_inlined_function <- function(f, env, ...) { # Process unquote operator at inlining time f <- expr_interp(f) # Transform to a purrr-like lambda fn <- as_function(f, env = env) body(fn) <- expr({ # Force all arguments base::pairlist(...) # Transform the lambda body into a maskable quosure inheriting # from the execution environment `_quo` <- rlang::quo(!!body(fn)) # Evaluate the quosure in the mask rlang::eval_bare(`_quo`, base::parent.frame()) }) structure(fn, class = "inline_colwise_function", formula = f) } dplyr/R/utils-format.R0000644000176200001440000000136214366556340014374 0ustar liggesusers#' Describing dimensions #' #' Prints the dimensions of an array-like object in a user-friendly manner, #' substituting `NA` with ?? (for SQL queries). #' #' @param x Object to show dimensions for. #' @export #' @keywords internal #' @examples #' dim_desc(mtcars) dim_desc <- function(x) { d <- dim(x) d2 <- big_mark(d) d2[is.na(d)] <- "??" paste0("[", paste0(d2, collapse = " x "), "]") } # function for the thousand separator, # returns "," unless it's used for the decimal point, in which case returns "." big_mark <- function(x, ...) { mark <- if (identical(getOption("OutDec"), ",")) "." else "," formatC(x, big.mark = mark, ...) } rule <- function(pad = "-", gap = 2L) { paste0(rep(pad, getOption("width") - gap), collapse = "") } dplyr/R/join-rows.R0000644000176200001440000003601414525503021013660 0ustar liggesusersjoin_rows <- function(x_key, y_key, ..., type = c("inner", "left", "right", "full", "semi", "anti", "nest"), na_matches = "na", condition = "==", filter = "none", cross = FALSE, multiple = "all", unmatched = "drop", relationship = NULL, error_call = caller_env(), user_env = caller_env()) { check_dots_empty0(...) type <- arg_match0( arg = type, values = c("inner", "left", "right", "full", "semi", "anti", "nest"), error_call = error_call ) unmatched <- check_unmatched(unmatched, type, error_call = error_call) x_unmatched <- unmatched$x y_unmatched <- unmatched$y # TODO: Remove this when `multiple = NULL / "error" / "warning"` is defunct if (is_null(multiple)) { warn_join_multiple_null(user_env = user_env) multiple <- "all" } else if (is_string(multiple, "error")) { warn_join_multiple("error", user_env = user_env) } else if (is_string(multiple, "warning")) { warn_join_multiple("warning", user_env = user_env) } if (cross) { # TODO: Remove this section when `by = character()` is defunct # Rather than matching on key values, match on a proxy where every x value # matches every y value. This purposefully does not propagate missings, as # missing values aren't considered in a cross-join. x_key <- vec_rep(1L, times = vec_size(x_key)) y_key <- vec_rep(1L, times = vec_size(y_key)) condition <- "==" filter <- "none" } if (is_null(relationship)) { relationship <- compute_join_relationship(type, condition, cross, user_env = user_env) } else { relationship <- check_join_relationship(relationship, error_call = error_call) } incomplete <- standardise_join_incomplete(type, na_matches, x_unmatched) no_match <- standardise_join_no_match(type, x_unmatched) remaining <- standardise_join_remaining(type, y_unmatched) matches <- dplyr_locate_matches( needles = x_key, haystack = y_key, condition = condition, filter = filter, incomplete = incomplete, no_match = no_match, remaining = remaining, multiple = multiple, relationship = relationship, needles_arg = "x", haystack_arg = "y", error_call = error_call ) list(x = matches$needles, y = matches$haystack) } dplyr_locate_matches <- function(needles, haystack, ..., condition = "==", filter = "none", incomplete = "compare", no_match = NA_integer_, remaining = "drop", multiple = "all", relationship = "none", needles_arg = "", haystack_arg = "", error_call = caller_env()) { check_dots_empty0(...) withCallingHandlers( vctrs::vec_locate_matches( needles = needles, haystack = haystack, condition = condition, filter = filter, incomplete = incomplete, no_match = no_match, remaining = remaining, multiple = multiple, relationship = relationship, needles_arg = needles_arg, haystack_arg = haystack_arg, nan_distinct = TRUE ), vctrs_error_incompatible_type = function(cnd) { abort("`join_cast_common()` should have handled this.", .internal = TRUE) }, vctrs_error_matches_overflow = function(cnd) { rethrow_error_join_matches_overflow(cnd, error_call) }, vctrs_error_matches_nothing = function(cnd) { rethrow_error_join_matches_nothing(cnd, error_call) }, vctrs_error_matches_incomplete = function(cnd) { rethrow_error_join_matches_incomplete(cnd, error_call) }, vctrs_error_matches_remaining = function(cnd) { rethrow_error_join_matches_remaining(cnd, error_call) }, vctrs_error_matches_relationship_one_to_one = function(cnd) { rethrow_error_join_relationship_one_to_one(cnd, error_call) }, vctrs_error_matches_relationship_one_to_many = function(cnd) { rethrow_error_join_relationship_one_to_many(cnd, error_call) }, vctrs_error_matches_relationship_many_to_one = function(cnd) { rethrow_error_join_relationship_many_to_one(cnd, error_call) }, vctrs_warning_matches_relationship_many_to_many = function(cnd) { rethrow_warning_join_relationship_many_to_many(cnd, error_call) }, vctrs_error_matches_multiple = function(cnd) { rethrow_error_join_matches_multiple(cnd, error_call) }, vctrs_warning_matches_multiple = function(cnd) { rethrow_warning_join_matches_multiple(cnd, error_call) } ) } rethrow_error_join_matches_overflow <- function(cnd, call) { size <- cnd$size stop_join( message = c( "This join would result in more rows than dplyr can handle.", i = glue( "{size} rows would be returned. ", "2147483647 rows is the maximum number allowed." ), i = paste0( "Double check your join keys. This error commonly occurs due to a ", "missing join key, or an improperly specified join condition." ) ), class = "dplyr_error_join_matches_overflow", call = call ) } rethrow_error_join_matches_nothing <- function(cnd, call) { i <- cnd$i stop_join( message = c( "Each row of `x` must have a match in `y`.", i = glue("Row {i} of `x` does not have a match.") ), class = "dplyr_error_join_matches_nothing", call = call ) } rethrow_error_join_matches_incomplete <- function(cnd, call) { # Only occurs with `na_matches = "never", unmatched = "error"` for # right and inner joins, and is a signal that `x` has unmatched incompletes # that would result in dropped rows. So really this is a matched-nothing case. rethrow_error_join_matches_nothing(cnd, call) } rethrow_error_join_matches_remaining <- function(cnd, call) { i <- cnd$i stop_join( message = c( "Each row of `y` must be matched by `x`.", i = glue("Row {i} of `y` was not matched.") ), class = "dplyr_error_join_matches_remaining", call = call ) } rethrow_error_join_relationship_one_to_one <- function(cnd, call) { i <- cnd$i which <- cnd$which if (which == "needles") { x_name <- "x" y_name <- "y" } else { x_name <- "y" y_name <- "x" } stop_join_matches_multiple( i = i, x_name = x_name, y_name = y_name, class = "dplyr_error_join_relationship_one_to_one", call = call ) } rethrow_error_join_relationship_one_to_many <- function(cnd, call) { stop_join_matches_multiple( i = cnd$i, x_name = "y", y_name = "x", class = "dplyr_error_join_relationship_one_to_many", call = call ) } rethrow_error_join_relationship_many_to_one <- function(cnd, call) { stop_join_matches_multiple( i = cnd$i, x_name = "x", y_name = "y", class = "dplyr_error_join_relationship_many_to_one", call = call ) } rethrow_warning_join_relationship_many_to_many <- function(cnd, call) { i <- cnd$i j <- cnd$j warn_join( message = c( "Detected an unexpected many-to-many relationship between `x` and `y`.", i = glue("Row {i} of `x` matches multiple rows in `y`."), i = glue("Row {j} of `y` matches multiple rows in `x`."), i = paste0( "If a many-to-many relationship is expected, ", "set `relationship = \"many-to-many\"` to silence this warning." ) ), class = "dplyr_warning_join_relationship_many_to_many", call = call ) # Cancel `cnd` maybe_restart("muffleWarning") } rethrow_error_join_matches_multiple <- function(cnd, call) { stop_join_matches_multiple( i = cnd$i, x_name = "x", y_name = "y", class = "dplyr_error_join_matches_multiple", call = call ) } rethrow_warning_join_matches_multiple <- function(cnd, call) { i <- cnd$i warn_join( message = c( glue("Each row in `x` is expected to match at most 1 row in `y`."), i = glue("Row {i} of `x` matches multiple rows.") ), class = "dplyr_warning_join_matches_multiple", call = call ) # Cancel `cnd` maybe_restart("muffleWarning") } stop_join_matches_multiple <- function(i, x_name, y_name, class, call) { stop_join( message = c( glue("Each row in `{x_name}` must match at most 1 row in `{y_name}`."), i = glue("Row {i} of `{x_name}` matches multiple rows in `{y_name}`.") ), class = class, call = call ) } stop_join <- function(message = NULL, class = NULL, ..., call = caller_env()) { stop_dplyr(message = message, class = c(class, "dplyr_error_join"), ..., call = call) } warn_join <- function(message = NULL, class = NULL, ...) { warn_dplyr(message = message, class = c(class, "dplyr_warning_join"), ...) } stop_dplyr <- function(message = NULL, class = NULL, ..., call = caller_env()) { abort(message = message, class = c(class, "dplyr_error"), ..., call = call) } warn_dplyr <- function(message = NULL, class = NULL, ...) { warn(message = message, class = c(class, "dplyr_warning"), ...) } check_unmatched <- function(unmatched, type, error_call = caller_env()) { # Inner joins check both `x` and `y` for unmatched keys, so `unmatched` is # allowed to be a character vector of size 2 in that case to check `x` and `y` # independently inner <- type == "inner" n_unmatched <- length(unmatched) if (n_unmatched == 1L || (n_unmatched == 2L && inner)) { arg_match( arg = unmatched, values = c("drop", "error"), multiple = TRUE, error_arg = "unmatched", error_call = error_call ) } else if (inner) { cli::cli_abort( "{.arg unmatched} must be length 1 or 2, not {n_unmatched}.", call = error_call ) } else { cli::cli_abort( "{.arg unmatched} must be length 1, not {n_unmatched}.", call = error_call ) } if (n_unmatched == 1L) { list(x = unmatched, y = unmatched) } else { list(x = unmatched[[1L]], y = unmatched[[2L]]) } } standardise_join_incomplete <- function(type, na_matches, x_unmatched) { if (na_matches == "na") { # Comparing missings in incomplete observations overrides the other arguments "compare" } else if (x_unmatched == "error" && (type == "right" || type == "inner")) { # Ensure that `x` can't drop rows when `na_matches = "never"` "error" } else if (type == "inner" || type == "right" || type == "semi") { # With these joins and `na_matches = "never"`, drop missings from `x` "drop" } else if (type == "nest") { # Nest join is special and returns `0` which will be sliced out later 0L } else { # Otherwise we are keeping all keys from `x` NA_integer_ } } standardise_join_no_match <- function(type, x_unmatched) { if (x_unmatched == "error" && (type == "right" || type == "inner")) { # Ensure that `x` can't drop rows "error" } else if (type == "inner" || type == "right" || type == "semi") { # With these joins, unmatched keys in `x` get dropped "drop" } else if (type == "nest") { # Nest join is special and returns `0` which will be sliced out later 0L } else { # Otherwise we are keeping all keys from `x` NA_integer_ } } standardise_join_remaining <- function(type, y_unmatched) { if (y_unmatched == "error" && (type == "left" || type == "inner" || type == "nest")) { # Ensure that `y` can't drop rows "error" } else if (type == "right" || type == "full") { # With these joins, unmatched keys in `y` are kept NA_integer_ } else { # Otherwise we drop unmatched keys in `y` "drop" } } compute_join_relationship <- function(type, condition, cross, user_env = caller_env(2)) { if (type == "nest") { # Not unreasonable to see a many-to-many relationship here, but it can't # result in a Cartesian explosion in the result so we don't check for it return("none") } if (type %in% c("semi", "anti")) { # Impossible to generate a many-to-many relationship here because we set # `multiple = "any"` return("none") } if (cross) { # TODO: Remove when `by = character()` is defunct # Cross-joins always result in many-to-many relationships return("none") } any_inequality <- any(condition != "==") if (any_inequality) { # We only check for a many-to-many relationship when doing an equality join, # because that is where it is typically unexpected. # - Inequality and overlap joins often generate many-to-many relationships # by nature # - Rolling joins are a little trickier, but we've decided that not warning # is probably easier to explain. `relationship = "many-to-one"` can always # be used explicitly as needed. return("none") } if (!is_direct(user_env)) { # Indirect calls don't warn, because the caller is unlikely to have access # to `relationship` to silence it return("none") } "warn-many-to-many" } check_join_relationship <- function(relationship, error_call = caller_env()) { arg_match0( arg = relationship, values = c("one-to-one", "one-to-many", "many-to-one", "many-to-many"), error_call = error_call ) } # ------------------------------------------------------------------------------ warn_join_multiple <- function(what, user_env = caller_env(2)) { what <- glue::glue('Specifying `multiple = "{what}"`') lifecycle::deprecate_warn( when = "1.1.1", what = I(what), with = I('`relationship = "many-to-one"`'), user_env = user_env, always = TRUE ) } warn_join_multiple_null <- function(user_env = caller_env(2)) { # Only really needed in case people wrapped `left_join()` and friends and # passed the old default of `NULL` through lifecycle::deprecate_warn( when = "1.1.1", what = I("Specifying `multiple = NULL`"), with = I('`multiple = "all"`'), user_env = user_env, always = TRUE ) } # ------------------------------------------------------------------------------ # TODO: Use upstream function when exported from rlang # `lifecycle:::is_direct()` is_direct <- function(env) { env_inherits_global(env) || from_testthat(env) } env_inherits_global <- function(env) { # `topenv(emptyenv())` returns the global env. Return `FALSE` in # that case to allow passing the empty env when the # soft-deprecation should not be promoted to deprecation based on # the caller environment. if (is_reference(env, empty_env())) { return(FALSE) } is_reference(topenv(env), global_env()) } # TRUE if we are in unit tests and the package being tested is the # same as the package that called from_testthat <- function(env) { tested_package <- Sys.getenv("TESTTHAT_PKG") if (!nzchar(tested_package)) { return(FALSE) } top <- topenv(env) if (!is_namespace(top)) { return(FALSE) } # Test for environment names rather than reference/contents because # testthat clones the namespace identical(ns_env_name(top), tested_package) } dplyr/R/bind-cols.R0000644000176200001440000000321514525503021013600 0ustar liggesusers #' Bind multiple data frames by column #' #' @description #' Bind any number of data frames by column, making a wider result. #' This is similar to `do.call(cbind, dfs)`. #' #' Where possible prefer using a [join][left_join] to combine multiple #' data frames. `bind_cols()` binds the rows in order in which they appear #' so it is easy to create meaningless results without realising it. #' #' @param ... Data frames to combine. Each argument can either be a data frame, #' a list that could be a data frame, or a list of data frames. #' Inputs are [recycled][vctrs::theory-faq-recycling] to the same length, #' then matched by position. #' @param .name_repair One of `"unique"`, `"universal"`, or #' `"check_unique"`. See [vctrs::vec_as_names()] for the meaning of these #' options. #' @returns A data frame the same type as the first element of `...`. #' @export #' @examples #' df1 <- tibble(x = 1:3) #' df2 <- tibble(y = 3:1) #' bind_cols(df1, df2) #' #' # Row sizes must be compatible when column-binding #' try(bind_cols(tibble(x = 1:3), tibble(y = 1:2))) bind_cols <- function(..., .name_repair = c("unique", "universal", "check_unique", "minimal")) { dots <- list2(...) dots <- list_flatten(dots, recursive = TRUE) dots <- discard(dots, is.null) # Strip names off of data frame components so that vec_cbind() unpacks them names2(dots)[map_lgl(dots, is.data.frame)] <- "" out <- vec_cbind(!!!dots, .name_repair = .name_repair, .error_call = current_env()) if (!any(map_lgl(dots, is.data.frame))) { out <- as_tibble(out) } if (length(dots) && is.data.frame(first <- dots[[1L]])) { out <- dplyr_reconstruct(out, first) } out } dplyr/R/n-distinct.R0000644000176200001440000000260314366556340014021 0ustar liggesusers#' Count unique combinations #' #' `n_distinct()` counts the number of unique/distinct combinations in a set #' of one or more vectors. It's a faster and more concise equivalent to #' `nrow(unique(data.frame(...)))`. #' #' @param ... Unnamed vectors. If multiple vectors are supplied, then they should #' have the same length. #' @param na.rm If `TRUE`, exclude missing observations from the count. #' If there are multiple vectors in `...`, an observation will #' be excluded if _any_ of the values are missing. #' @returns A single number. #' @export #' @examples #' x <- c(1, 1, 2, 2, 2) #' n_distinct(x) #' #' y <- c(3, 3, NA, 3, 3) #' n_distinct(y) #' n_distinct(y, na.rm = TRUE) #' #' # Pairs (1, 3), (2, 3), and (2, NA) are distinct #' n_distinct(x, y) #' #' # (2, NA) is dropped, leaving 2 distinct combinations #' n_distinct(x, y, na.rm = TRUE) #' #' # Also works with data frames #' n_distinct(data.frame(x, y)) n_distinct <- function(..., na.rm = FALSE) { if (missing(...)) { cli::cli_abort("{.arg ...} is absent, but must be supplied.") } check_dots_unnamed() data <- df_list( ..., .unpack = FALSE, .name_repair = "minimal", .error_call = current_env() ) data <- new_data_frame(data) if (isTRUE(na.rm)) { # Drop observation if *any* missing complete <- vec_detect_complete(data) data <- vec_slice(data, complete) } vec_unique_count(data) } dplyr/R/deprec-context.R0000644000176200001440000000162714366556340014676 0ustar liggesusers#' Information about the "current" group or variable #' #' @description #' `r lifecycle::badge("deprecated")` #' #' These functions were deprecated in dplyr 1.1.0. #' #' * `cur_data()` is deprecated in favor of [pick()]. #' * `cur_data_all()` is deprecated but does not have a direct replacement as #' selecting the grouping variables is not well-defined and is unlikely to #' ever be useful. #' #' @keywords internal #' @name deprec-context NULL #' @rdname deprec-context #' @export cur_data <- function() { lifecycle::deprecate_soft(when = "1.1.0", what = "cur_data()", with = "pick()") mask <- peek_mask() vars <- mask$current_non_group_vars() mask$pick_current(vars) } #' @rdname deprec-context #' @export cur_data_all <- function() { lifecycle::deprecate_soft(when = "1.1.0", what = "cur_data_all()", with = "pick()") mask <- peek_mask() vars <- mask$current_vars() mask$pick_current(vars) } dplyr/R/join-cross.R0000644000176200001440000000543314366556340014037 0ustar liggesusers#' Cross join #' #' @description #' Cross joins match each row in `x` to every row in `y`, resulting in a data #' frame with `nrow(x) * nrow(y)` rows. #' #' Since cross joins result in all possible matches between `x` and `y`, they #' technically serve as the basis for all [mutating joins][mutate-joins], which #' can generally be thought of as cross joins followed by a filter. In practice, #' a more specialized procedure is used for better performance. #' #' @inheritParams left_join #' #' @returns #' An object of the same type as `x` (including the same groups). The output has #' the following properties: #' #' - There are `nrow(x) * nrow(y)` rows returned. #' #' - Output columns include all columns from both `x` and `y`. Column name #' collisions are resolved using `suffix`. #' #' - The order of the rows and columns of `x` is preserved as much as possible. #' #' @section Methods: #' This function is a **generic**, which means that packages can provide #' implementations (methods) for other classes. See the documentation of #' individual methods for extra arguments and differences in behaviour. #' #' The following methods are currently available in loaded packages: #' \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("cross_join")}. #' #' @family joins #' @export #' @examples #' # Cross joins match each row in `x` to every row in `y`. #' # Data within the columns is not used in the matching process. #' cross_join(band_instruments, band_members) #' #' # Control the suffix added to variables duplicated in #' # `x` and `y` with `suffix`. #' cross_join(band_instruments, band_members, suffix = c("", "_y")) cross_join <- function(x, y, ..., copy = FALSE, suffix = c(".x", ".y")) { UseMethod("cross_join") } #' @export cross_join.data.frame <- function(x, y, ..., copy = FALSE, suffix = c(".x", ".y")) { check_dots_empty0(...) y <- auto_copy(x, y, copy = copy) x_names <- tbl_vars(x) y_names <- tbl_vars(y) # Empty join by with no keys by <- new_join_by() # Particular value isn't too important, as there are no keys to keep/drop keep <- FALSE vars <- join_cols( x_names = x_names, y_names = y_names, by = by, suffix = suffix, keep = keep ) x_in <- as_tibble(x, .name_repair = "minimal") y_in <- as_tibble(y, .name_repair = "minimal") x_size <- vec_size(x_in) y_size <- vec_size(y_in) x_out <- set_names(x_in, names(vars$x$out)) y_out <- set_names(y_in, names(vars$y$out)) x_out <- vec_rep_each(x_out, times = y_size) y_out <- vec_rep(y_out, times = x_size) x_out[names(y_out)] <- y_out dplyr_reconstruct(x_out, x) } dplyr/R/reexport-pillar.R0000644000176200001440000000175114366556340015101 0ustar liggesusers#' Get a glimpse of your data #' #' @description #' `glimpse()` is like a transposed version of `print()`: #' columns run down the page, and data runs across. #' This makes it possible to see every column in a data frame. #' It's a little like [str()] applied to a data frame #' but it tries to show you as much data as possible. #' (And it always shows the underlying data, even when applied #' to a remote data source.) #' #' `glimpse()` is provided by the pillar package, and re-exported #' by dplyr. See [pillar::glimpse()] for more details. #' #' @return x original x is (invisibly) returned, allowing `glimpse()` to be #' used within a data pipeline. #' @examples #' glimpse(mtcars) #' #' # Note that original x is (invisibly) returned, allowing `glimpse()` to be #' # used within a pipeline. #' mtcars %>% #' glimpse() %>% #' select(1:3) #' #' glimpse(starwars) #' @importFrom pillar glimpse #' @export #' @name glimpse glimpse #' @importFrom pillar type_sum #' @export pillar::type_sum dplyr/R/summarise.R0000644000176200001440000003566414406402754013761 0ustar liggesusers#' Summarise each group down to one row #' #' @description #' `summarise()` creates a new data frame. It returns one row for each #' combination of grouping variables; if there are no grouping variables, the #' output will have a single row summarising all observations in the input. It #' will contain one column for each grouping variable and one column for each of #' the summary statistics that you have specified. #' #' `summarise()` and `summarize()` are synonyms. #' #' @section Useful functions: #' #' * Center: [mean()], [median()] #' * Spread: [sd()], [IQR()], [mad()] #' * Range: [min()], [max()], #' * Position: [first()], [last()], [nth()], #' * Count: [n()], [n_distinct()] #' * Logical: [any()], [all()] #' #' @section Backend variations: #' #' The data frame backend supports creating a variable and using it in the #' same summary. This means that previously created summary variables can be #' further transformed or combined within the summary, as in [mutate()]. #' However, it also means that summary variables with the same names as previous #' variables overwrite them, making those variables unavailable to later summary #' variables. #' #' This behaviour may not be supported in other backends. To avoid unexpected #' results, consider using new names for your summary variables, especially when #' creating multiple summaries. #' #' @export #' @inheritParams arrange #' @inheritParams args_by #' @param ... <[`data-masking`][rlang::args_data_masking]> Name-value pairs of #' summary functions. The name will be the name of the variable in the result. #' #' The value can be: #' * A vector of length 1, e.g. `min(x)`, `n()`, or `sum(is.na(y))`. #' * A data frame, to add multiple columns from a single expression. #' #' `r lifecycle::badge("deprecated")` Returning values with size 0 or >1 was #' deprecated as of 1.1.0. Please use [reframe()] for this instead. #' @param .groups `r lifecycle::badge("experimental")` Grouping structure of the #' result. #' #' * "drop_last": dropping the last level of grouping. This was the #' only supported option before version 1.0.0. #' * "drop": All levels of grouping are dropped. #' * "keep": Same grouping structure as `.data`. #' * "rowwise": Each row is its own group. #' #' When `.groups` is not specified, it is chosen #' based on the number of rows of the results: #' * If all the results have 1 row, you get "drop_last". #' * If the number of rows varies, you get "keep" (note that returning a #' variable number of rows was deprecated in favor of [reframe()], which #' also unconditionally drops all levels of grouping). #' #' In addition, a message informs you of that choice, unless the result is ungrouped, #' the option "dplyr.summarise.inform" is set to `FALSE`, #' or when `summarise()` is called from a function in a package. #' #' @family single table verbs #' @return #' An object _usually_ of the same type as `.data`. #' #' * The rows come from the underlying [group_keys()]. #' * The columns are a combination of the grouping keys and the summary #' expressions that you provide. #' * The grouping structure is controlled by the `.groups=` argument, the #' output may be another [grouped_df], a [tibble] or a [rowwise] data frame. #' * Data frame attributes are **not** preserved, because `summarise()` #' fundamentally creates a new data frame. #' @section Methods: #' This function is a **generic**, which means that packages can provide #' implementations (methods) for other classes. See the documentation of #' individual methods for extra arguments and differences in behaviour. #' #' The following methods are currently available in loaded packages: #' \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("summarise")}. #' @examples #' # A summary applied to ungrouped tbl returns a single row #' mtcars %>% #' summarise(mean = mean(disp), n = n()) #' #' # Usually, you'll want to group first #' mtcars %>% #' group_by(cyl) %>% #' summarise(mean = mean(disp), n = n()) #' #' # Each summary call removes one grouping level (since that group #' # is now just a single row) #' mtcars %>% #' group_by(cyl, vs) %>% #' summarise(cyl_n = n()) %>% #' group_vars() #' #' # BEWARE: reusing variables may lead to unexpected results #' mtcars %>% #' group_by(cyl) %>% #' summarise(disp = mean(disp), sd = sd(disp)) #' #' # Refer to column names stored as strings with the `.data` pronoun: #' var <- "mass" #' summarise(starwars, avg = mean(.data[[var]], na.rm = TRUE)) #' # Learn more in ?rlang::args_data_masking #' #' # In dplyr 1.1.0, returning multiple rows per group was deprecated in favor #' # of `reframe()`, which never messages and always returns an ungrouped #' # result: #' mtcars %>% #' group_by(cyl) %>% #' summarise(qs = quantile(disp, c(0.25, 0.75)), prob = c(0.25, 0.75)) #' # -> #' mtcars %>% #' group_by(cyl) %>% #' reframe(qs = quantile(disp, c(0.25, 0.75)), prob = c(0.25, 0.75)) summarise <- function(.data, ..., .by = NULL, .groups = NULL) { by <- enquo(.by) if (!quo_is_null(by) && !is.null(.groups)) { abort("Can't supply both `.by` and `.groups`.") } UseMethod("summarise") } #' @rdname summarise #' @export summarize <- summarise #' @export summarise.data.frame <- function(.data, ..., .by = NULL, .groups = NULL) { by <- compute_by({{ .by }}, .data, by_arg = ".by", data_arg = ".data") cols <- summarise_cols(.data, dplyr_quosures(...), by, "summarise") out <- summarise_build(by, cols) if (!cols$all_one) { summarise_deprecate_variable_size() } if (!is_tibble(.data)) { # The `by` group data we build from is always a tibble, # so we have to manually downcast as needed out <- as.data.frame(out) } if (identical(.groups, "rowwise")) { out <- rowwise_df(out, character()) } out } #' @export summarise.grouped_df <- function(.data, ..., .by = NULL, .groups = NULL) { # Will always error if `.by != NULL` b/c you can't use it with grouped/rowwise dfs. by <- compute_by({{ .by }}, .data, by_arg = ".by", data_arg = ".data") cols <- summarise_cols(.data, dplyr_quosures(...), by, "summarise") out <- summarise_build(by, cols) verbose <- summarise_verbose(.groups, caller_env()) if (!cols$all_one) { summarise_deprecate_variable_size() } if (is.null(.groups)) { if (cols$all_one) { .groups <- "drop_last" } else { .groups <- "keep" } } group_vars <- by$names if (identical(.groups, "drop_last")) { n <- length(group_vars) if (n > 1) { if (verbose) { new_groups <- glue_collapse(paste0("'", group_vars[-n], "'"), sep = ", ") summarise_inform("has grouped output by {new_groups}") } out <- grouped_df(out, group_vars[-n], group_by_drop_default(.data)) } } else if (identical(.groups, "keep")) { if (verbose) { new_groups <- glue_collapse(paste0("'", group_vars, "'"), sep = ", ") summarise_inform("has grouped output by {new_groups}") } out <- grouped_df(out, group_vars, group_by_drop_default(.data)) } else if (identical(.groups, "rowwise")) { out <- rowwise_df(out, group_vars) } else if(!identical(.groups, "drop")) { bullets <- c( paste0("`.groups` can't be ", as_label(.groups)), i = 'Possible values are NULL (default), "drop_last", "drop", "keep", and "rowwise"' ) abort(bullets) } out } #' @export summarise.rowwise_df <- function(.data, ..., .by = NULL, .groups = NULL) { # Will always error if `.by != NULL` b/c you can't use it with grouped/rowwise dfs. by <- compute_by({{ .by }}, .data, by_arg = ".by", data_arg = ".data") cols <- summarise_cols(.data, dplyr_quosures(...), by, "summarise") out <- summarise_build(by, cols) verbose <- summarise_verbose(.groups, caller_env()) if (!cols$all_one) { summarise_deprecate_variable_size() } group_vars <- by$names if (is.null(.groups) || identical(.groups, "keep")) { if (verbose && length(group_vars)) { new_groups <- glue_collapse(paste0("'", group_vars, "'"), sep = ", ") summarise_inform("has grouped output by {new_groups}") } out <- grouped_df(out, group_vars) } else if (identical(.groups, "rowwise")) { out <- rowwise_df(out, group_vars) } else if (!identical(.groups, "drop")) { bullets <- c( paste0("`.groups` can't be ", as_label(.groups)), i = 'Possible values are NULL (default), "drop", "keep", and "rowwise"' ) abort(bullets) } out } summarise_cols <- function(data, dots, by, verb, error_call = caller_env()) { error_call <- dplyr_error_call(error_call) mask <- DataMask$new(data, by, verb, error_call = error_call) on.exit(mask$forget(), add = TRUE) old_current_column <- context_peek_bare("column") on.exit(context_poke("column", old_current_column), add = TRUE) warnings_state <- env(warnings = list()) cols <- list() sizes <- 1L chunks <- list() results <- list() types <- list() out_names <- character() local_error_context(dots, 0L, mask = mask) withCallingHandlers({ for (i in seq_along(dots)) { poke_error_context(dots, i, mask = mask) context_poke("column", old_current_column) dot <- dots[[i]] # - expand dot <- expand_pick(dot, mask) quosures <- expand_across(dot) # - compute quosures_results <- map(quosures, summarise_eval_one, mask = mask) # - structure for (k in seq_along(quosures)) { quo <- quosures[[k]] quo_data <- attr(quo, "dplyr:::data") quo_result <- quosures_results[[k]] if (is.null(quo_result)) { next } types_k <- quo_result$types chunks_k <- quo_result$chunks results_k <- quo_result$results if (!quo_data$is_named && is.data.frame(types_k)) { chunks_extracted <- .Call(dplyr_extract_chunks, chunks_k, types_k) types_k_names <- names(types_k) for (j in seq_along(chunks_extracted)) { mask$add_one( name = types_k_names[j], chunks = chunks_extracted[[j]], result = results_k[[j]] ) } chunks <- append(chunks, chunks_extracted) types <- append(types, as.list(types_k)) results <- append(results, results_k) out_names <- c(out_names, types_k_names) } else { name <- dplyr_quosure_name(quo_data) mask$add_one(name = name, chunks = chunks_k, result = results_k) chunks <- append(chunks, list(chunks_k)) types <- append(types, list(types_k)) results <- append(results, list(results_k)) out_names <- c(out_names, name) } } } # Recycle horizontally across sets of chunks. # Modifies `chunks` and `results` in place for efficiency! sizes <- .Call(`dplyr_summarise_recycle_chunks_in_place`, chunks, results) # Materialize columns, regenerate any `results` that were `NULL`ed # during the recycling process. for (i in seq_along(chunks)) { result <- results[[i]] %||% vec_c(!!!chunks[[i]], .ptype = types[[i]]) cols[[ out_names[i] ]] <- result } }, error = function(cnd) { if (inherits(cnd, "dplyr:::summarise_incompatible_size")) { action <- "recycle" i <- cnd$dplyr_error_data$index } else { action <- "compute" i <- i } handler <- dplyr_error_handler( dots = dots, mask = mask, bullets = summarise_bullets, error_call = error_call, action = action ) handler(cnd) }, warning = dplyr_warning_handler( state = warnings_state, mask = mask, error_call = error_call )) signal_warnings(warnings_state, error_call) list(new = cols, sizes = sizes, all_one = all(sizes == 1L)) } summarise_eval_one <- function(quo, mask) { quo_data <- attr(quo, "dplyr:::data") if (!is.null(quo_data$column)) { context_poke("column", quo_data$column) # wrap the error when this has been expanded chunks_k <- withCallingHandlers( mask$eval_all_summarise(quo), error = function(cnd) { name <- dplyr_quosure_name(quo_data) msg <- glue("Can't compute column `{name}`.") abort(msg, call = call("across"), parent = cnd) } ) } else { # no wrapping otherwise chunks_k <- mask$eval_all_summarise(quo) } if (is.null(chunks_k)) { return(NULL) } # `name` specified lazily types_k <- dplyr_vec_ptype_common( chunks = chunks_k, name = dplyr_quosure_name(quo_data) ) chunks_k <- vec_cast_common(!!!chunks_k, .to = types_k) result_k <- vec_c(!!!chunks_k, .ptype = types_k) list(chunks = chunks_k, types = types_k, results = result_k) } summarise_build <- function(by, cols) { out <- group_keys0(by$data) if (!cols$all_one) { out <- vec_rep_each(out, cols$sizes) } dplyr_col_modify(out, cols$new) } summarise_bullets <- function(cnd, ...) { UseMethod("summarise_bullets") } #' @export `summarise_bullets.dplyr:::summarise_unsupported_type` <- function(cnd, ...) { result <- cnd$dplyr_error_data$result error_name <- ctxt_error_label() c( glue("`{error_name}` must be a vector, not {obj_type_friendly(result)}."), i = cnd_bullet_rowwise_unlist() ) } #' @export `summarise_bullets.dplyr:::summarise_incompatible_size` <- function(cnd, ...) { expected_size <- cnd$dplyr_error_data$expected_size size <- cnd$dplyr_error_data$size group <- cnd$dplyr_error_data$group error_context <- peek_error_context() error_name <- ctxt_error_label(error_context) # FIXME: So that cnd_bullet_cur_group_label() correctly reports the # faulty group peek_mask()$set_current_group(group) c( glue("`{error_name}` must be size {or_1(expected_size)}, not {size}."), i = glue("An earlier column had size {expected_size}.") ) } #' @export `summarise_bullets.dplyr:::summarise_mixed_null` <- function(cnd, ...) { error_name <- ctxt_error_label() c( glue("`{error_name}` must return compatible vectors across groups."), x = "Can't combine NULL and non NULL results." ) } # messaging --------------------------------------------------------------- summarise_verbose <- function(.groups, .env) { if (!is.null(.groups)) { # User supplied `.groups` return(FALSE) } inform <- getOption("dplyr.summarise.inform") if (is_true(inform) || is_false(inform)) { # User supplied global option return(inform) } is_reference(topenv(.env), global_env()) } summarise_inform <- function(..., .env = parent.frame()) { inform(paste0( "`summarise()` ", glue(..., .envir = .env), '. You can override using the `.groups` argument.' )) } summarise_deprecate_variable_size <- function(env = caller_env(), user_env = caller_env(2)) { lifecycle::deprecate_warn( when = "1.1.0", what = I("Returning more (or less) than 1 row per `summarise()` group"), with = "reframe()", details = paste0( "When switching from `summarise()` to `reframe()`, remember that ", "`reframe()` always returns an ungrouped data frame and adjust accordingly." ), env = env, user_env = user_env, always = TRUE ) } dplyr/R/top-n.R0000644000176200001440000000412014266276767013011 0ustar liggesusers#' Select top (or bottom) n rows (by value) #' #' @description #' `r lifecycle::badge("superseded")` #' `top_n()` has been superseded in favour of [slice_min()]/[slice_max()]. #' While it will not be deprecated in the near future, retirement means #' that we will only perform critical bug fixes, so we recommend moving to the #' newer alternatives. #' #' `top_n()` was superseded because the name was fundamentally confusing as #' it returned what you might reasonably consider to be the _bottom_ #' rows. Additionally, the `wt` variable had a confusing name, and strange #' default (the last column in the data frame). Unfortunately we could not #' see an easy way to fix the existing `top_n()` function without breaking #' existing code, so we created a new alternative. #' #' @param x A data frame. #' @param n Number of rows to return for `top_n()`, fraction of rows to #' return for `top_frac()`. If `n` is positive, selects the top rows. #' If negative, selects the bottom rows. #' If `x` is grouped, this is the number (or fraction) of rows per group. #' Will include more rows if there are ties. #' @param wt (Optional). The variable to use for ordering. If not #' specified, defaults to the last variable in the tbl. #' @keywords internal #' @export #' @examples #' df <- data.frame(x = c(6, 4, 1, 10, 3, 1, 1)) #' #' df %>% top_n(2) # highest values #' df %>% top_n(-2) # lowest values #' # now use #' df %>% slice_max(x, n = 2) #' df %>% slice_min(x, n = 2) #' #' # top_frac() -> prop argument of slice_min()/slice_max() #' df %>% top_frac(.5) #' # -> #' df %>% slice_max(x, prop = 0.5) top_n <- function(x, n, wt) { lifecycle::signal_stage("superseded", "top_n()") wt <- enquo(wt) if (quo_is_missing(wt)) { vars <- tbl_vars(x) wt_name <- vars[length(vars)] inform(glue("Selecting by ", wt_name)) wt <- sym(wt_name) } filter(x, top_n_rank({{ n }}, !!wt)) } top_n_rank <- function(n, wt) { if (n > 0) { min_rank(desc(wt)) <= n } else { min_rank(wt) <= abs(n) } } #' @export #' @rdname top_n top_frac <- function(x, n, wt) { top_n(x, {{ n }} * n(), {{ wt }}) } dplyr/R/colwise-mutate.R0000644000176200001440000003454414366556340014720 0ustar liggesusers#' Summarise multiple columns #' #' @description #' `r lifecycle::badge("superseded")` #' #' Scoped verbs (`_if`, `_at`, `_all`) have been superseded by the use of #' [pick()] or [across()] in an existing verb. See `vignette("colwise")` for #' details. #' #' The [scoped] variants of [summarise()] make it easy to apply the same #' transformation to multiple variables. #' There are three variants. #' * `summarise_all()` affects every variable #' * `summarise_at()` affects variables selected with a character vector or #' vars() #' * `summarise_if()` affects variables selected with a predicate function #' #' @inheritParams scoped #' @param .cols This argument has been renamed to `.vars` to fit #' dplyr's terminology and is deprecated. #' @return A data frame. By default, the newly created columns have the shortest #' names needed to uniquely identify the output. To force inclusion of a name, #' even when not needed, name the input (see examples for details). #' @seealso [The other scoped verbs][scoped], [vars()] #' #' @section Grouping variables: #' #' If applied on a grouped tibble, these operations are *not* applied #' to the grouping variables. The behaviour depends on whether the #' selection is **implicit** (`all` and `if` selections) or #' **explicit** (`at` selections). #' #' * Grouping variables covered by explicit selections in #' `summarise_at()` are always an error. Add `-group_cols()` to the #' [vars()] selection to avoid this: #' #' ``` #' data %>% #' summarise_at(vars(-group_cols(), ...), myoperation) #' ``` #' #' Or remove `group_vars()` from the character vector of column names: #' #' ``` #' nms <- setdiff(nms, group_vars(data)) #' data %>% summarise_at(nms, myoperation) #' ``` #' #' * Grouping variables covered by implicit selections are silently #' ignored by `summarise_all()` and `summarise_if()`. #' #' @section Naming: #' #' The names of the new columns are derived from the names of the #' input variables and the names of the functions. #' #' - if there is only one unnamed function (i.e. if `.funs` is an unnamed list #' of length one), #' the names of the input variables are used to name the new columns; #' #' - for `_at` functions, if there is only one unnamed variable (i.e., #' if `.vars` is of the form `vars(a_single_column)`) and `.funs` has length #' greater than one, #' the names of the functions are used to name the new columns; #' #' - otherwise, the new names are created by #' concatenating the names of the input variables and the names of the #' functions, separated with an underscore `"_"`. #' #' The `.funs` argument can be a named or unnamed list. #' If a function is unnamed and the name cannot be derived automatically, #' a name of the form "fn#" is used. #' Similarly, [vars()] accepts named and unnamed arguments. #' If a variable in `.vars` is named, a new column by that name will be created. #' #' Name collisions in the new columns are disambiguated using a unique suffix. #' #' @examples #' # The _at() variants directly support strings: #' starwars %>% #' summarise_at(c("height", "mass"), mean, na.rm = TRUE) #' # -> #' starwars %>% summarise(across(c("height", "mass"), ~ mean(.x, na.rm = TRUE))) #' #' # You can also supply selection helpers to _at() functions but you have #' # to quote them with vars(): #' starwars %>% #' summarise_at(vars(height:mass), mean, na.rm = TRUE) #' # -> #' starwars %>% #' summarise(across(height:mass, ~ mean(.x, na.rm = TRUE))) #' #' # The _if() variants apply a predicate function (a function that #' # returns TRUE or FALSE) to determine the relevant subset of #' # columns. Here we apply mean() to the numeric columns: #' starwars %>% #' summarise_if(is.numeric, mean, na.rm = TRUE) #' starwars %>% #' summarise(across(where(is.numeric), ~ mean(.x, na.rm = TRUE))) #' #' by_species <- iris %>% #' group_by(Species) #' #' # If you want to apply multiple transformations, pass a list of #' # functions. When there are multiple functions, they create new #' # variables instead of modifying the variables in place: #' by_species %>% #' summarise_all(list(min, max)) #' # -> #' by_species %>% #' summarise(across(everything(), list(min = min, max = max))) #' @export #' @keywords internal summarise_all <- function(.tbl, .funs, ...) { lifecycle::signal_stage("superseded", "summarise_all()") funs <- manip_all(.tbl, .funs, enquo(.funs), caller_env(), ..., .caller = "summarise_all") summarise(.tbl, !!!funs) } #' @rdname summarise_all #' @export summarise_if <- function(.tbl, .predicate, .funs, ...) { lifecycle::signal_stage("superseded", "summarise_if()") funs <- manip_if(.tbl, .predicate, .funs, enquo(.funs), caller_env(), ..., .caller = "summarise_if") summarise(.tbl, !!!funs) } #' @rdname summarise_all #' @export summarise_at <- function(.tbl, .vars, .funs, ..., .cols = NULL) { lifecycle::signal_stage("superseded", "summarise_at()") .vars <- check_dot_cols(.vars, .cols) funs <- manip_at(.tbl, .vars, .funs, enquo(.funs), caller_env(), ..., .caller = "summarise_at") summarise(.tbl, !!!funs) } #' @rdname summarise_all #' @export summarize_all <- summarise_all #' @rdname summarise_all #' @export summarize_if <- summarise_if #' @rdname summarise_all #' @export summarize_at <- summarise_at #' Mutate multiple columns #' #' @description #' `r lifecycle::badge("superseded")` #' #' Scoped verbs (`_if`, `_at`, `_all`) have been superseded by the use of #' [pick()] or [across()] in an existing verb. See `vignette("colwise")` for #' details. #' #' The [scoped] variants of [mutate()] and [transmute()] make it easy to apply #' the same transformation to multiple variables. There are three variants: #' * _all affects every variable #' * _at affects variables selected with a character vector or vars() #' * _if affects variables selected with a predicate function: #' #' @inheritParams scoped #' @inheritParams summarise_all #' @return A data frame. By default, the newly created columns have the shortest #' names needed to uniquely identify the output. To force inclusion of a name, #' even when not needed, name the input (see examples for details). #' @seealso [The other scoped verbs][scoped], [vars()] #' #' @section Grouping variables: #' #' If applied on a grouped tibble, these operations are *not* applied #' to the grouping variables. The behaviour depends on whether the #' selection is **implicit** (`all` and `if` selections) or #' **explicit** (`at` selections). #' #' * Grouping variables covered by explicit selections in #' `mutate_at()` and `transmute_at()` are always an error. Add #' `-group_cols()` to the [vars()] selection to avoid this: #' #' ``` #' data %>% mutate_at(vars(-group_cols(), ...), myoperation) #' ``` #' #' Or remove `group_vars()` from the character vector of column names: #' #' ``` #' nms <- setdiff(nms, group_vars(data)) #' data %>% mutate_at(vars, myoperation) #' ``` #' #' * Grouping variables covered by implicit selections are ignored by #' `mutate_all()`, `transmute_all()`, `mutate_if()`, and #' `transmute_if()`. #' #' @inheritSection summarise_all Naming #' #' @examples #' iris <- as_tibble(iris) #' #' # All variants can be passed functions and additional arguments, #' # purrr-style. The _at() variants directly support strings. Here #' # we'll scale the variables `height` and `mass`: #' scale2 <- function(x, na.rm = FALSE) (x - mean(x, na.rm = na.rm)) / sd(x, na.rm) #' starwars %>% mutate_at(c("height", "mass"), scale2) #' # -> #' starwars %>% mutate(across(c("height", "mass"), scale2)) #' #' # You can pass additional arguments to the function: #' starwars %>% mutate_at(c("height", "mass"), scale2, na.rm = TRUE) #' starwars %>% mutate_at(c("height", "mass"), ~scale2(., na.rm = TRUE)) #' # -> #' starwars %>% mutate(across(c("height", "mass"), ~ scale2(.x, na.rm = TRUE))) #' #' # You can also supply selection helpers to _at() functions but you have #' # to quote them with vars(): #' iris %>% mutate_at(vars(matches("Sepal")), log) #' iris %>% mutate(across(matches("Sepal"), log)) #' #' # The _if() variants apply a predicate function (a function that #' # returns TRUE or FALSE) to determine the relevant subset of #' # columns. Here we divide all the numeric columns by 100: #' starwars %>% mutate_if(is.numeric, scale2, na.rm = TRUE) #' starwars %>% mutate(across(where(is.numeric), ~ scale2(.x, na.rm = TRUE))) #' #' # mutate_if() is particularly useful for transforming variables from #' # one type to another #' iris %>% mutate_if(is.factor, as.character) #' iris %>% mutate_if(is.double, as.integer) #' # -> #' iris %>% mutate(across(where(is.factor), as.character)) #' iris %>% mutate(across(where(is.double), as.integer)) #' #' # Multiple transformations ---------------------------------------- #' #' # If you want to apply multiple transformations, pass a list of #' # functions. When there are multiple functions, they create new #' # variables instead of modifying the variables in place: #' iris %>% mutate_if(is.numeric, list(scale2, log)) #' iris %>% mutate_if(is.numeric, list(~scale2(.), ~log(.))) #' iris %>% mutate_if(is.numeric, list(scale = scale2, log = log)) #' # -> #' iris %>% #' as_tibble() %>% #' mutate(across(where(is.numeric), list(scale = scale2, log = log))) #' #' # When there's only one function in the list, it modifies existing #' # variables in place. Give it a name to instead create new variables: #' iris %>% mutate_if(is.numeric, list(scale2)) #' iris %>% mutate_if(is.numeric, list(scale = scale2)) #' @export #' @keywords internal mutate_all <- function(.tbl, .funs, ...) { lifecycle::signal_stage("superseded", "mutate_all()") check_grouped(.tbl, "mutate", "all", alt = TRUE) funs <- manip_all(.tbl, .funs, enquo(.funs), caller_env(), ..., .caller = "mutate_all") mutate(.tbl, !!!funs) } #' @rdname mutate_all #' @export mutate_if <- function(.tbl, .predicate, .funs, ...) { lifecycle::signal_stage("superseded", "mutate_if()") check_grouped(.tbl, "mutate", "if") funs <- manip_if(.tbl, .predicate, .funs, enquo(.funs), caller_env(), ..., .caller = "mutate_if") mutate(.tbl, !!!funs) } #' @rdname mutate_all #' @export mutate_at <- function(.tbl, .vars, .funs, ..., .cols = NULL) { lifecycle::signal_stage("superseded", "mutate_at()") .vars <- check_dot_cols(.vars, .cols) funs <- manip_at(.tbl, .vars, .funs, enquo(.funs), caller_env(), .include_group_vars = TRUE, ..., .caller = "mutate_at") mutate(.tbl, !!!funs) } #' @rdname mutate_all #' @export transmute_all <- function(.tbl, .funs, ...) { lifecycle::signal_stage("superseded", "transmute_all()") check_grouped(.tbl, "transmute", "all", alt = TRUE) funs <- manip_all(.tbl, .funs, enquo(.funs), caller_env(), ..., .caller = "transmute_all") transmute(.tbl, !!!funs) } #' @rdname mutate_all #' @export transmute_if <- function(.tbl, .predicate, .funs, ...) { lifecycle::signal_stage("superseded", "transmute_if()") check_grouped(.tbl, "transmute", "if") funs <- manip_if(.tbl, .predicate, .funs, enquo(.funs), caller_env(), ..., .caller = "transmute_if") transmute(.tbl, !!!funs) } #' @rdname mutate_all #' @export transmute_at <- function(.tbl, .vars, .funs, ..., .cols = NULL) { lifecycle::signal_stage("superseded", "transmute_at()") .vars <- check_dot_cols(.vars, .cols) funs <- manip_at(.tbl, .vars, .funs, enquo(.funs), caller_env(), .include_group_vars = TRUE, ..., .caller = "transmute_at") transmute(.tbl, !!!funs) } # Helpers ----------------------------------------------------------------- manip_all <- function(.tbl, .funs, .quo, .env, ..., .include_group_vars = FALSE, .caller, error_call = caller_env()) { if (.include_group_vars) { syms <- syms(tbl_vars(.tbl)) } else { syms <- syms(tbl_nongroup_vars(.tbl)) } funs <- as_fun_list(.funs, .env, ..., .caller = .caller, error_call = error_call, .user_env = caller_env(2)) manip_apply_syms(funs, syms, .tbl) } manip_if <- function(.tbl, .predicate, .funs, .quo, .env, ..., .include_group_vars = FALSE, .caller, error_call = caller_env()) { vars <- tbl_if_syms(.tbl, .predicate, .env, .include_group_vars = .include_group_vars, error_call = error_call) funs <- as_fun_list(.funs, .env, ..., .caller = .caller, error_call = error_call, .user_env = caller_env(2)) manip_apply_syms(funs, vars, .tbl) } manip_at <- function(.tbl, .vars, .funs, .quo, .env, ..., .include_group_vars = FALSE, .caller, error_call = caller_env()) { syms <- tbl_at_syms(.tbl, .vars, .include_group_vars = .include_group_vars, error_call = error_call) funs <- as_fun_list(.funs, .env, ..., .caller = .caller, error_call = error_call, .user_env = caller_env(2)) manip_apply_syms(funs, syms, .tbl) } check_grouped <- function(tbl, verb, suffix, alt = FALSE) { if (is_grouped_df(tbl)) { if (alt) { alt_line <- sprintf("Use `%s_at(df, vars(-group_cols()), myoperation)` to silence the message.", verb) } else { alt_line <- chr() } inform(c( sprintf("`%s_%s()` ignored the following grouping variables:", verb, suffix), set_names(fmt_cols(group_vars(tbl)), "*"), "i" = alt_line )) } } check_dot_cols <- function(vars, cols) { if (is_null(cols)) { vars } else { inform("`.cols` has been renamed and is deprecated, please use `.vars`") if (missing(vars)) cols else vars } } manip_apply_syms <- function(funs, syms, tbl) { out <- vector("list", length(syms) * length(funs)) dim(out) <- c(length(syms), length(funs)) syms_position <- match(as.character(syms), tbl_vars(tbl)) for (i in seq_along(syms)) { pos <- syms_position[i] for (j in seq_along(funs)) { fun <- funs[[j]] if (is_quosure(fun)) { out[[i, j]] <- expr_substitute(funs[[j]], quote(.), syms[[i]]) } else { out[[i, j]] <- call2(funs[[j]], syms[[i]]) } attr(out[[i, j]], "position") <- pos } } dim(out) <- NULL # Use symbols as default names unnamed <- !have_name(syms) names(syms)[unnamed] <- map_chr(syms[unnamed], as_string) if (length(funs) == 1 && !attr(funs, "have_name")) { names(out) <- names(syms) } else { nms <- names(funs) %||% rep("", length(funs)) is_fun <- nms == "" | nms == "" nms[is_fun] <- paste0("fn", seq_len(sum(is_fun))) nms <- unique_names(nms, quiet = TRUE) names(funs) <- nms if (length(syms) == 1 && all(unnamed)) { names(out) <- names(funs) } else { syms_names <- ifelse(unnamed, map_chr(syms, as_string), names(syms)) grid <- expand.grid(var = syms_names, call = names(funs)) names(out) <- paste(grid$var, grid$call, sep = "_") } } out } dplyr/R/data-storms.R0000644000176200001440000000374014366556340014206 0ustar liggesusers#' Storm tracks data #' #' This dataset is the NOAA Atlantic hurricane database best track data, #' . The data includes the positions and #' attributes of storms from `r min(storms$year)`-`r max(storms$year)`. Storms #' from 1979 onward are measured every six hours during the lifetime of the #' storm. Storms in earlier years have some missing data. #' #' @seealso The script to create the storms data set: #' #' #' @format A tibble with `r format(nrow(storms), big.mark = ",")` observations #' and `r ncol(storms)` variables: #' \describe{ #' \item{name}{Storm Name} #' \item{year,month,day}{Date of report} #' \item{hour}{Hour of report (in UTC)} #' \item{lat,long}{Location of storm center} #' \item{status}{Storm classification (Tropical Depression, Tropical Storm, #' or Hurricane)} #' \item{category}{Saffir-Simpson hurricane category calculated from wind speed. #' \itemize{ #' \item `NA`: Not a hurricane #' \item 1: 64+ knots #' \item 2: 83+ knots #' \item 3: 96+ knots #' \item 4: 113+ knots #' \item 5: 137+ knots #' } #' } #' \item{wind}{storm's maximum sustained wind speed (in knots)} #' \item{pressure}{Air pressure at the storm's center (in millibars)} #' \item{tropicalstorm_force_diameter}{Diameter (in nautical miles) of the #' area experiencing tropical storm strength winds (34 knots or above). #' Only available starting in 2004.} #' \item{hurricane_force_diameter}{Diameter (in nautical miles) of the area #' experiencing hurricane strength winds (64 knots or above). Only available #' starting in 2004.} #' } #' @examples #' storms #' #' # Show a few recent storm paths #' if (requireNamespace("ggplot2", quietly = TRUE)) { #' library(ggplot2) #' storms %>% #' filter(year >= 2000) %>% #' ggplot(aes(long, lat, color = paste(year, name))) + #' geom_path(show.legend = FALSE) + #' facet_wrap(~year) #' } #' #' storms "storms" dplyr/R/data-starwars.R0000644000176200001440000000211014525503021014474 0ustar liggesusers#' Starwars characters #' #' The original data, from SWAPI, the Star Wars API, , has been revised #' to reflect additional research into gender and sex determinations of characters. #' #' @format A tibble with 87 rows and 14 variables: #' \describe{ #' \item{name}{Name of the character} #' \item{height}{Height (cm)} #' \item{mass}{Weight (kg)} #' \item{hair_color,skin_color,eye_color}{Hair, skin, and eye colors} #' \item{birth_year}{Year born (BBY = Before Battle of Yavin)} #' \item{sex}{The biological sex of the character, namely male, female, hermaphroditic, or none (as in the case for Droids).} #' \item{gender}{The gender role or gender identity of the character as determined by their personality or the way they were programmed (as in the case for Droids).} #' \item{homeworld}{Name of homeworld} #' \item{species}{Name of species} #' \item{films}{List of films the character appeared in} #' \item{vehicles}{List of vehicles the character has piloted} #' \item{starships}{List of starships the character has piloted} #' } #' @examples #' starwars "starwars" dplyr/R/join.R0000644000176200001440000007353014406402754012705 0ustar liggesusers#' Mutating joins #' #' @description #' Mutating joins add columns from `y` to `x`, matching observations based on #' the keys. There are four mutating joins: the inner join, and the three outer #' joins. #' #' ## Inner join #' #' An `inner_join()` only keeps observations from `x` that have a matching key #' in `y`. #' #' The most important property of an inner join is that unmatched rows in either #' input are not included in the result. This means that generally inner joins #' are not appropriate in most analyses, because it is too easy to lose #' observations. #' #' ## Outer joins #' #' The three outer joins keep observations that appear in at least one of the #' data frames: #' #' * A `left_join()` keeps all observations in `x`. #' #' * A `right_join()` keeps all observations in `y`. #' #' * A `full_join()` keeps all observations in `x` and `y`. #' #' @section Many-to-many relationships: #' #' By default, dplyr guards against many-to-many relationships in equality joins #' by throwing a warning. These occur when both of the following are true: #' #' - A row in `x` matches multiple rows in `y`. #' - A row in `y` matches multiple rows in `x`. #' #' This is typically surprising, as most joins involve a relationship of #' one-to-one, one-to-many, or many-to-one, and is often the result of an #' improperly specified join. Many-to-many relationships are particularly #' problematic because they can result in a Cartesian explosion of the number of #' rows returned from the join. #' #' If a many-to-many relationship is expected, silence this warning by #' explicitly setting `relationship = "many-to-many"`. #' #' In production code, it is best to preemptively set `relationship` to whatever #' relationship you expect to exist between the keys of `x` and `y`, as this #' forces an error to occur immediately if the data doesn't align with your #' expectations. #' #' Inequality joins typically result in many-to-many relationships by nature, so #' they don't warn on them by default, but you should still take extra care when #' specifying an inequality join, because they also have the capability to #' return a large number of rows. #' #' Rolling joins don't warn on many-to-many relationships either, but many #' rolling joins follow a many-to-one relationship, so it is often useful to #' set `relationship = "many-to-one"` to enforce this. #' #' Note that in SQL, most database providers won't let you specify a #' many-to-many relationship between two tables, instead requiring that you #' create a third _junction table_ that results in two one-to-many relationships #' instead. #' #' @return #' An object of the same type as `x` (including the same groups). The order of #' the rows and columns of `x` is preserved as much as possible. The output has #' the following properties: #' #' * The rows are affect by the join type. #' * `inner_join()` returns matched `x` rows. #' * `left_join()` returns all `x` rows. #' * `right_join()` returns matched of `x` rows, followed by unmatched `y` rows. #' * `full_join()` returns all `x` rows, followed by unmatched `y` rows. #' * Output columns include all columns from `x` and all non-key columns from #' `y`. If `keep = TRUE`, the key columns from `y` are included as well. #' * If non-key columns in `x` and `y` have the same name, `suffix`es are added #' to disambiguate. If `keep = TRUE` and key columns in `x` and `y` have #' the same name, `suffix`es are added to disambiguate these as well. #' * If `keep = FALSE`, output columns included in `by` are coerced to their #' common type between `x` and `y`. #' @section Methods: #' These functions are **generic**s, which means that packages can provide #' implementations (methods) for other classes. See the documentation of #' individual methods for extra arguments and differences in behaviour. #' #' Methods available in currently loaded packages: #' #' * `inner_join()`: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("inner_join")}. #' * `left_join()`: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("left_join")}. #' * `right_join()`: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("right_join")}. #' * `full_join()`: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("full_join")}. #' @param x,y A pair of data frames, data frame extensions (e.g. a tibble), or #' lazy data frames (e.g. from dbplyr or dtplyr). See *Methods*, below, for #' more details. #' @param by A join specification created with [join_by()], or a character #' vector of variables to join by. #' #' If `NULL`, the default, `*_join()` will perform a natural join, using all #' variables in common across `x` and `y`. A message lists the variables so #' that you can check they're correct; suppress the message by supplying `by` #' explicitly. #' #' To join on different variables between `x` and `y`, use a [join_by()] #' specification. For example, `join_by(a == b)` will match `x$a` to `y$b`. #' #' To join by multiple variables, use a [join_by()] specification with #' multiple expressions. For example, `join_by(a == b, c == d)` will match #' `x$a` to `y$b` and `x$c` to `y$d`. If the column names are the same between #' `x` and `y`, you can shorten this by listing only the variable names, like #' `join_by(a, c)`. #' #' [join_by()] can also be used to perform inequality, rolling, and overlap #' joins. See the documentation at [?join_by][join_by()] for details on #' these types of joins. #' #' For simple equality joins, you can alternatively specify a character vector #' of variable names to join by. For example, `by = c("a", "b")` joins `x$a` #' to `y$a` and `x$b` to `y$b`. If variable names differ between `x` and `y`, #' use a named character vector like `by = c("x_a" = "y_a", "x_b" = "y_b")`. #' #' To perform a cross-join, generating all combinations of `x` and `y`, see #' [cross_join()]. #' @param copy If `x` and `y` are not from the same data source, #' and `copy` is `TRUE`, then `y` will be copied into the #' same src as `x`. This allows you to join tables across srcs, but #' it is a potentially expensive operation so you must opt into it. #' @param suffix If there are non-joined duplicate variables in `x` and #' `y`, these suffixes will be added to the output to disambiguate them. #' Should be a character vector of length 2. #' @param keep Should the join keys from both `x` and `y` be preserved in the #' output? #' - If `NULL`, the default, joins on equality retain only the keys from `x`, #' while joins on inequality retain the keys from both inputs. #' - If `TRUE`, all keys from both inputs are retained. #' - If `FALSE`, only keys from `x` are retained. For right and full joins, #' the data in key columns corresponding to rows that only exist in `y` are #' merged into the key columns from `x`. Can't be used when joining on #' inequality conditions. #' @param ... Other parameters passed onto methods. #' @param na_matches Should two `NA` or two `NaN` values match? #' - `"na"`, the default, treats two `NA` or two `NaN` values as equal, like #' `%in%`, [match()], and [merge()]. #' - `"never"` treats two `NA` or two `NaN` values as different, and will #' never match them together or to any other values. This is similar to joins #' for database sources and to `base::merge(incomparables = NA)`. #' @param multiple Handling of rows in `x` with multiple matches in `y`. #' For each row of `x`: #' - `"all"`, the default, returns every match detected in `y`. This is the #' same behavior as SQL. #' - `"any"` returns one match detected in `y`, with no guarantees on which #' match will be returned. It is often faster than `"first"` and `"last"` #' if you just need to detect if there is at least one match. #' - `"first"` returns the first match detected in `y`. #' - `"last"` returns the last match detected in `y`. #' @param unmatched How should unmatched keys that would result in dropped rows #' be handled? #' - `"drop"` drops unmatched keys from the result. #' - `"error"` throws an error if unmatched keys are detected. #' #' `unmatched` is intended to protect you from accidentally dropping rows #' during a join. It only checks for unmatched keys in the input that could #' potentially drop rows. #' - For left joins, it checks `y`. #' - For right joins, it checks `x`. #' - For inner joins, it checks both `x` and `y`. In this case, `unmatched` is #' also allowed to be a character vector of length 2 to specify the behavior #' for `x` and `y` independently. #' @param relationship Handling of the expected relationship between the keys of #' `x` and `y`. If the expectations chosen from the list below are #' invalidated, an error is thrown. #' #' - `NULL`, the default, doesn't expect there to be any relationship between #' `x` and `y`. However, for equality joins it will check for a many-to-many #' relationship (which is typically unexpected) and will warn if one occurs, #' encouraging you to either take a closer look at your inputs or make this #' relationship explicit by specifying `"many-to-many"`. #' #' See the _Many-to-many relationships_ section for more details. #' #' - `"one-to-one"` expects: #' - Each row in `x` matches at most 1 row in `y`. #' - Each row in `y` matches at most 1 row in `x`. #' #' - `"one-to-many"` expects: #' - Each row in `y` matches at most 1 row in `x`. #' #' - `"many-to-one"` expects: #' - Each row in `x` matches at most 1 row in `y`. #' #' - `"many-to-many"` doesn't perform any relationship checks, but is provided #' to allow you to be explicit about this relationship if you know it #' exists. #' #' `relationship` doesn't handle cases where there are zero matches. For that, #' see `unmatched`. #' @family joins #' @examples #' band_members %>% inner_join(band_instruments) #' band_members %>% left_join(band_instruments) #' band_members %>% right_join(band_instruments) #' band_members %>% full_join(band_instruments) #' #' # To suppress the message about joining variables, supply `by` #' band_members %>% inner_join(band_instruments, by = join_by(name)) #' # This is good practice in production code #' #' # Use an equality expression if the join variables have different names #' band_members %>% full_join(band_instruments2, by = join_by(name == artist)) #' # By default, the join keys from `x` and `y` are coalesced in the output; use #' # `keep = TRUE` to keep the join keys from both `x` and `y` #' band_members %>% #' full_join(band_instruments2, by = join_by(name == artist), keep = TRUE) #' #' # If a row in `x` matches multiple rows in `y`, all the rows in `y` will be #' # returned once for each matching row in `x`. #' df1 <- tibble(x = 1:3) #' df2 <- tibble(x = c(1, 1, 2), y = c("first", "second", "third")) #' df1 %>% left_join(df2) #' #' # If a row in `y` also matches multiple rows in `x`, this is known as a #' # many-to-many relationship, which is typically a result of an improperly #' # specified join or some kind of messy data. In this case, a warning is #' # thrown by default: #' df3 <- tibble(x = c(1, 1, 1, 3)) #' df3 %>% left_join(df2) #' #' # In the rare case where a many-to-many relationship is expected, set #' # `relationship = "many-to-many"` to silence this warning #' df3 %>% left_join(df2, relationship = "many-to-many") #' #' # Use `join_by()` with a condition other than `==` to perform an inequality #' # join. Here we match on every instance where `df1$x > df2$x`. #' df1 %>% left_join(df2, join_by(x > x)) #' #' # By default, NAs match other NAs so that there are two #' # rows in the output of this join: #' df1 <- data.frame(x = c(1, NA), y = 2) #' df2 <- data.frame(x = c(1, NA), z = 3) #' left_join(df1, df2) #' #' # You can optionally request that NAs don't match, giving a #' # a result that more closely resembles SQL joins #' left_join(df1, df2, na_matches = "never") #' @aliases join join.data.frame #' @name mutate-joins NULL #' @export #' @rdname mutate-joins inner_join <- function(x, y, by = NULL, copy = FALSE, suffix = c(".x", ".y"), ..., keep = NULL) { UseMethod("inner_join") } #' @export #' @rdname mutate-joins inner_join.data.frame <- function(x, y, by = NULL, copy = FALSE, suffix = c(".x", ".y"), ..., keep = NULL, na_matches = c("na", "never"), multiple = "all", unmatched = "drop", relationship = NULL) { check_dots_empty0(...) y <- auto_copy(x, y, copy = copy) join_mutate( x = x, y = y, by = by, type = "inner", suffix = suffix, na_matches = na_matches, keep = keep, multiple = multiple, unmatched = unmatched, relationship = relationship, user_env = caller_env() ) } #' @export #' @rdname mutate-joins left_join <- function(x, y, by = NULL, copy = FALSE, suffix = c(".x", ".y"), ..., keep = NULL) { UseMethod("left_join") } #' @export #' @rdname mutate-joins left_join.data.frame <- function(x, y, by = NULL, copy = FALSE, suffix = c(".x", ".y"), ..., keep = NULL, na_matches = c("na", "never"), multiple = "all", unmatched = "drop", relationship = NULL) { check_dots_empty0(...) y <- auto_copy(x, y, copy = copy) join_mutate( x = x, y = y, by = by, type = "left", suffix = suffix, na_matches = na_matches, keep = keep, multiple = multiple, unmatched = unmatched, relationship = relationship, user_env = caller_env() ) } #' @export #' @rdname mutate-joins right_join <- function(x, y, by = NULL, copy = FALSE, suffix = c(".x", ".y"), ..., keep = NULL) { UseMethod("right_join") } #' @export #' @rdname mutate-joins right_join.data.frame <- function(x, y, by = NULL, copy = FALSE, suffix = c(".x", ".y"), ..., keep = NULL, na_matches = c("na", "never"), multiple = "all", unmatched = "drop", relationship = NULL) { check_dots_empty0(...) y <- auto_copy(x, y, copy = copy) join_mutate( x = x, y = y, by = by, type = "right", suffix = suffix, na_matches = na_matches, keep = keep, multiple = multiple, unmatched = unmatched, relationship = relationship, user_env = caller_env() ) } #' @export #' @rdname mutate-joins full_join <- function(x, y, by = NULL, copy = FALSE, suffix = c(".x", ".y"), ..., keep = NULL) { UseMethod("full_join") } #' @export #' @rdname mutate-joins full_join.data.frame <- function(x, y, by = NULL, copy = FALSE, suffix = c(".x", ".y"), ..., keep = NULL, na_matches = c("na", "never"), multiple = "all", relationship = NULL) { check_dots_empty0(...) y <- auto_copy(x, y, copy = copy) join_mutate( x = x, y = y, by = by, type = "full", suffix = suffix, na_matches = na_matches, keep = keep, multiple = multiple, # All keys from both inputs are retained. Erroring never makes sense. unmatched = "drop", relationship = relationship, user_env = caller_env() ) } #' Filtering joins #' #' @description #' Filtering joins filter rows from `x` based on the presence or absence #' of matches in `y`: #' #' * `semi_join()` return all rows from `x` with a match in `y`. #' * `anti_join()` return all rows from `x` with**out** a match in `y`. #' #' @param x,y A pair of data frames, data frame extensions (e.g. a tibble), or #' lazy data frames (e.g. from dbplyr or dtplyr). See *Methods*, below, for #' more details. #' @inheritParams left_join #' @return #' An object of the same type as `x`. The output has the following properties: #' #' * Rows are a subset of the input, but appear in the same order. #' * Columns are not modified. #' * Data frame attributes are preserved. #' * Groups are taken from `x`. The number of groups may be reduced. #' @section Methods: #' These function are **generic**s, which means that packages can provide #' implementations (methods) for other classes. See the documentation of #' individual methods for extra arguments and differences in behaviour. #' #' Methods available in currently loaded packages: #' #' * `semi_join()`: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("semi_join")}. #' * `anti_join()`: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("anti_join")}. #' @family joins #' @examples #' # "Filtering" joins keep cases from the LHS #' band_members %>% semi_join(band_instruments) #' band_members %>% anti_join(band_instruments) #' #' # To suppress the message about joining variables, supply `by` #' band_members %>% semi_join(band_instruments, by = join_by(name)) #' # This is good practice in production code #' @name filter-joins NULL #' @export #' @rdname filter-joins semi_join <- function(x, y, by = NULL, copy = FALSE, ...) { UseMethod("semi_join") } #' @export #' @rdname filter-joins semi_join.data.frame <- function(x, y, by = NULL, copy = FALSE, ..., na_matches = c("na", "never")) { check_dots_empty0(...) y <- auto_copy(x, y, copy = copy) join_filter(x, y, by = by, type = "semi", na_matches = na_matches, user_env = caller_env()) } #' @export #' @rdname filter-joins anti_join <- function(x, y, by = NULL, copy = FALSE, ...) { UseMethod("anti_join") } #' @export #' @rdname filter-joins anti_join.data.frame <- function(x, y, by = NULL, copy = FALSE, ..., na_matches = c("na", "never")) { check_dots_empty0(...) y <- auto_copy(x, y, copy = copy) join_filter(x, y, by = by, type = "anti", na_matches = na_matches, user_env = caller_env()) } #' Nest join #' #' A nest join leaves `x` almost unchanged, except that it adds a new #' list-column, where each element contains the rows from `y` that match the #' corresponding row in `x`. #' #' # Relationship to other joins #' #' You can recreate many other joins from the result of a nest join: #' #' * [inner_join()] is a `nest_join()` plus [tidyr::unnest()]. #' * [left_join()] is a `nest_join()` plus `tidyr::unnest(keep_empty = TRUE)`. #' * [semi_join()] is a `nest_join()` plus a `filter()` where you check #' that every element of data has at least one row. #' * [anti_join()] is a `nest_join()` plus a `filter()` where you check that every #' element has zero rows. #' #' @param name The name of the list-column created by the join. If `NULL`, #' the default, the name of `y` is used. #' @param keep Should the new list-column contain join keys? The default #' will preserve the join keys for inequality joins. #' @return #' The output: #' * Is same type as `x` (including having the same groups). #' * Has exactly the same number of rows as `x`. #' * Contains all the columns of `x` in the same order with the same values. #' They are only modified (slightly) if `keep = FALSE`, when columns listed #' in `by` will be coerced to their common type across `x` and `y`. #' * Gains one new column called `{name}` on the far right, a list column #' containing data frames the same type as `y`. #' @section Methods: #' This function is a **generic**, which means that packages can provide #' implementations (methods) for other classes. See the documentation of #' individual methods for extra arguments and differences in behaviour. #' #' The following methods are currently available in loaded packages: #' \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("nest_join")}. #' @inheritParams left_join #' @family joins #' @export #' @examples #' df1 <- tibble(x = 1:3) #' df2 <- tibble(x = c(2, 3, 3), y = c("a", "b", "c")) #' #' out <- nest_join(df1, df2) #' out #' out$df2 nest_join <- function(x, y, by = NULL, copy = FALSE, keep = NULL, name = NULL, ...) { UseMethod("nest_join") } #' @export #' @rdname nest_join nest_join.data.frame <- function(x, y, by = NULL, copy = FALSE, keep = NULL, name = NULL, ..., na_matches = c("na", "never"), unmatched = "drop") { check_dots_empty0(...) check_keep(keep) na_matches <- check_na_matches(na_matches) if (is.null(name)) { name <- as_label(enexpr(y)) } else { check_string(name) } x_names <- tbl_vars(x) y_names <- tbl_vars(y) if (is_cross_by(by)) { warn_join_cross_by() by <- new_join_by() cross <- TRUE } else { cross <- FALSE } if (is_null(by)) { by <- join_by_common(x_names, y_names) } else { by <- as_join_by(by) } vars <- join_cols(x_names, y_names, by = by, suffix = c("", ""), keep = keep) y <- auto_copy(x, y, copy = copy) x_in <- as_tibble(x, .name_repair = "minimal") y_in <- as_tibble(y, .name_repair = "minimal") x_key <- set_names(x_in[vars$x$key], names(vars$x$key)) y_key <- set_names(y_in[vars$y$key], names(vars$x$key)) args <- join_cast_common(x_key, y_key, vars) x_key <- args$x y_key <- args$y condition <- by$condition filter <- by$filter # We always want to retain all of the matches. We never experience a Cartesian # explosion because `nrow(x) == nrow(out)` is an invariant of `nest_join()`, # and the whole point of `nest_join()` is to nest all of the matches for that # row of `x` (#6392). multiple <- "all" # Will be set to `"none"` in `join_rows()`. Because we can't have a Cartesian # explosion, we don't care about many-to-many relationships. relationship <- NULL rows <- join_rows( x_key = x_key, y_key = y_key, type = "nest", na_matches = na_matches, condition = condition, filter = filter, cross = cross, multiple = multiple, unmatched = unmatched, relationship = relationship, user_env = caller_env() ) y_loc <- vec_split(rows$y, rows$x)$val out <- set_names(x_in[vars$x$out], names(vars$x$out)) # Modify all columns in one step so that we only need to re-group once new_cols <- vec_cast(out[names(x_key)], x_key) y_out <- set_names(y_in[vars$y$out], names(vars$y$out)) y_out <- map(y_loc, vec_slice, x = y_out) y_out <- map(y_out, dplyr_reconstruct, template = y) new_cols[[name]] <- y_out out <- dplyr_col_modify(out, new_cols) dplyr_reconstruct(out, x) } # helpers ----------------------------------------------------------------- join_mutate <- function(x, y, by, type, ..., suffix = c(".x", ".y"), na_matches = "na", keep = NULL, multiple = "all", unmatched = "drop", relationship = NULL, error_call = caller_env(), user_env = caller_env()) { check_dots_empty0(...) na_matches <- check_na_matches(na_matches, error_call = error_call) check_keep(keep, error_call = error_call) x_names <- tbl_vars(x) y_names <- tbl_vars(y) if (is_cross_by(by)) { warn_join_cross_by(env = error_call, user_env = user_env) by <- new_join_by() cross <- TRUE } else { cross <- FALSE } if (is_null(by)) { by <- join_by_common(x_names, y_names, error_call = error_call) } else { by <- as_join_by(by, error_call = error_call) } vars <- join_cols( x_names = x_names, y_names = y_names, by = by, suffix = suffix, keep = keep, error_call = error_call ) x_in <- as_tibble(x, .name_repair = "minimal") y_in <- as_tibble(y, .name_repair = "minimal") x_key <- set_names(x_in[vars$x$key], names(vars$x$key)) y_key <- set_names(y_in[vars$y$key], names(vars$x$key)) args <- join_cast_common(x_key, y_key, vars, error_call = error_call) x_key <- args$x y_key <- args$y condition <- by$condition filter <- by$filter rows <- join_rows( x_key = x_key, y_key = y_key, type = type, na_matches = na_matches, condition = condition, filter = filter, cross = cross, multiple = multiple, unmatched = unmatched, relationship = relationship, error_call = error_call, user_env = user_env ) x_slicer <- rows$x y_slicer <- rows$y x_out <- set_names(x_in[vars$x$out], names(vars$x$out)) y_out <- set_names(y_in[vars$y$out], names(vars$y$out)) out <- vec_slice(x_out, x_slicer) out[names(y_out)] <- vec_slice(y_out, y_slicer) if (!is_true(keep)) { if (is_null(keep)) { merge <- by$x[by$condition == "=="] } else if (is_false(keep)) { # Won't ever contain non-equi conditions merge <- by$x } # Keys have already been cast to the common type x_merge <- x_key[merge] out[merge] <- vec_cast( x = out[merge], to = x_merge, call = error_call ) if ((type == "right" || type == "full") && anyNA(x_slicer)) { y_merge <- y_key[merge] new_rows <- which(is.na(x_slicer)) y_replacer <- y_slicer[new_rows] out[new_rows, merge] <- vec_slice(y_merge, y_replacer) } } dplyr_reconstruct(out, x) } join_filter <- function(x, y, by, type, ..., na_matches = c("na", "never"), error_call = caller_env(), user_env = caller_env()) { check_dots_empty0(...) na_matches <- check_na_matches(na_matches, error_call = error_call) x_names <- tbl_vars(x) y_names <- tbl_vars(y) if (is_cross_by(by)) { warn_join_cross_by(env = error_call, user_env = user_env) by <- new_join_by() cross <- TRUE } else { cross <- FALSE } if (is_null(by)) { by <- join_by_common(x_names, y_names, error_call = error_call) } else { by <- as_join_by(by, error_call = error_call) } vars <- join_cols(x_names, y_names, by = by, error_call = error_call) x_in <- as_tibble(x, .name_repair = "minimal") y_in <- as_tibble(y, .name_repair = "minimal") x_key <- set_names(x_in[vars$x$key], names(vars$x$key)) y_key <- set_names(y_in[vars$y$key], names(vars$x$key)) args <- join_cast_common(x_key, y_key, vars, error_call = error_call) x_key <- args$x y_key <- args$y condition <- by$condition filter <- by$filter # We only care about whether or not any matches exist multiple <- "any" # Will be set to `"none"` in `join_rows()`. Because `multiple = "any"`, that # means many-to-many relationships aren't possible. relationship <- NULL # Since we are actually testing the presence of matches, it doesn't make # sense to ever error on unmatched values. unmatched <- "drop" rows <- join_rows( x_key = x_key, y_key = y_key, type = type, na_matches = na_matches, condition = condition, filter = filter, cross = cross, multiple = multiple, unmatched = unmatched, relationship = relationship, error_call = error_call, user_env = user_env ) if (type == "semi") { # Unmatched needles and propagated missing needles will already be dropped idx <- rows$x } else { # Treat both unmatched needles and propagated missing needles as no-match no_match <- is.na(rows$y) idx <- rows$x[no_match] } dplyr_row_slice(x, idx) } check_na_matches <- function(na_matches, ..., error_call = caller_env()) { if (isNamespaceLoaded("pkgconfig")) { conf <- asNamespace("pkgconfig")$get_config("dplyr::na_matches") if (!is.null(conf)) { warn(c( "`dplyr::na_matches` pkgconfig options is now ignored.", "Please set `na_matches` directly." )) } } arg_match0( arg = na_matches, values = c("na", "never"), error_call = error_call ) } check_keep <- function(keep, error_call = caller_env()) { if (!is_bool(keep) && !is.null(keep)) { abort( glue("`keep` must be `TRUE`, `FALSE`, or `NULL`, not {obj_type_friendly(keep)}."), call = error_call) } } is_cross_by <- function(x) { if (is_character(x, n = 0L)) { # `character()` or `named character()` return(TRUE) } if (is_list(x, n = 2L) && is_character(x[["x"]], n = 0L) && is_character(x[["y"]], n = 0L)) { # `list(x = character(), y = character())` # (possibly with named empty character elements) return(TRUE) } FALSE } warn_join_cross_by <- function(env = caller_env(), user_env = caller_env(2)) { lifecycle::deprecate_soft( when = "1.1.0", what = I("Using `by = character()` to perform a cross join"), with = "cross_join()", env = env, user_env = user_env ) } dplyr/R/colwise-filter.R0000644000176200001440000001263614366556340014704 0ustar liggesusers#' Filter within a selection of variables #' #' @description #' `r lifecycle::badge("superseded")` #' #' Scoped verbs (`_if`, `_at`, `_all`) have been superseded by the use of #' [if_all()] or [if_any()] in an existing verb. See `vignette("colwise")` for #' details. #' #' These [scoped] filtering verbs apply a predicate expression to a #' selection of variables. The predicate expression should be quoted #' with [all_vars()] or [any_vars()] and should mention the pronoun #' `.` to refer to variables. #' #' @inheritParams scoped #' @param .vars_predicate A quoted predicate expression as returned by #' [all_vars()] or [any_vars()]. #' #' Can also be a function or purrr-like formula. In this case, the #' intersection of the results is taken by default and there's #' currently no way to request the union. #' @param .preserve when `FALSE` (the default), the grouping structure #' is recalculated based on the resulting data, otherwise it is kept as is. #' @export #' #' @section Grouping variables: #' #' The grouping variables that are part of the selection are taken #' into account to determine filtered rows. #' #' @keywords internal #' @examples #' # While filter() accepts expressions with specific variables, the #' # scoped filter verbs take an expression with the pronoun `.` and #' # replicate it over all variables. This expression should be quoted #' # with all_vars() or any_vars(): #' all_vars(is.na(.)) #' any_vars(is.na(.)) #' #' #' # You can take the intersection of the replicated expressions: #' filter_all(mtcars, all_vars(. > 150)) #' # -> #' filter(mtcars, if_all(everything(), ~ .x > 150)) #' #' # Or the union: #' filter_all(mtcars, any_vars(. > 150)) #' # -> #' filter(mtcars, if_any(everything(), ~ . > 150)) #' #' #' # You can vary the selection of columns on which to apply the #' # predicate. filter_at() takes a vars() specification: #' filter_at(mtcars, vars(starts_with("d")), any_vars((. %% 2) == 0)) #' # -> #' filter(mtcars, if_any(starts_with("d"), ~ (.x %% 2) == 0)) #' #' # And filter_if() selects variables with a predicate function: #' filter_if(mtcars, ~ all(floor(.) == .), all_vars(. != 0)) #' # -> #' is_int <- function(x) all(floor(x) == x) #' filter(mtcars, if_all(where(is_int), ~ .x != 0)) filter_all <- function(.tbl, .vars_predicate, .preserve = FALSE) { lifecycle::signal_stage("superseded", "filter_all()") syms <- syms(tbl_vars(.tbl)) pred <- apply_filter_syms(.vars_predicate, syms, .tbl) filter(.tbl, !!pred, .preserve = .preserve) } #' @rdname filter_all #' @export filter_if <- function(.tbl, .predicate, .vars_predicate, .preserve = FALSE) { lifecycle::signal_stage("superseded", "filter_if()") syms <- tbl_if_syms(.tbl, .predicate, .include_group_vars = TRUE) pred <- apply_filter_syms(.vars_predicate, syms, .tbl) filter(.tbl, !!pred, .preserve = .preserve) } #' @rdname filter_all #' @export filter_at <- function(.tbl, .vars, .vars_predicate, .preserve = FALSE) { lifecycle::signal_stage("superseded", "filter_at()") syms <- tbl_at_syms(.tbl, .vars, .include_group_vars = TRUE) pred <- apply_filter_syms(.vars_predicate, syms, .tbl) filter(.tbl, !!pred, .preserve = .preserve) } apply_filter_syms <- function(pred, syms, tbl, error_call = caller_env()) { if (is_empty(syms)) { msg <- glue("`.predicate` must match at least one column.") abort(msg, call = error_call) } joiner <- all_exprs if (inherits_any(pred, c("all_vars", "any_vars"))) { if (inherits(pred, "any_vars")) { joiner <- any_exprs } pred <- map(syms, function(sym) expr_substitute(pred, quote(.), sym)) } else if (is_bare_formula(pred) || is_function(pred)) { pred <- as_function(pred) pred <- map(syms, function(sym) call2(pred, sym)) } else { msg <- glue("`.vars_predicate` must be a function or a call to `all_vars()` or `any_vars()`, not {obj_type_friendly(pred)}.") abort(msg, call = error_call) } joiner(!!!pred) } ## Return the union or intersection of predicate expressions. ## ## `all_exprs()` and `any_exprs()` take predicate expressions and join them ## into a single predicate. They assume vectorised expressions by ## default and join them with `&` or `|`. Note that this will also ## work with scalar predicates, but if you want to be explicit you can ## set `.vectorised` to `FALSE` to join by `&&` or `||`. ## ## @param ... Predicate expressions. ## @param .vectorised If `TRUE`, predicates are joined with `&` or ## `|`. Otherwise, they are joined with `&&` or `||`. ## @return A [quosure][rlang::quo]. ## @export ## @examples ## all_exprs(cyl > 3, am == 1) ## any_exprs(cyl > 3, am == 1) ## any_exprs(cyl > 3, am == 1, .vectorised = FALSE) all_exprs <- function(..., .vectorised = TRUE) { op <- if (.vectorised) quote(`&`) else quote(`&&`) quo_reduce(..., .op = op) } ## @rdname all_exprs ## @export any_exprs <- function(..., .vectorised = TRUE) { op <- if (.vectorised) quote(`|`) else quote(`||`) quo_reduce(..., .op = op) } ## @param .op Can be a function or a quoted name of a function. If a ## quoted name, the default environment is the [base ## environment][rlang::base_env] unless you supply a ## [quosure][rlang::quo]. quo_reduce <- function(..., .op) { stopifnot(is_symbol(.op) || is_function(.op)) dots <- enquos(...) if (length(dots) == 1) { return(dots[[1]]) } op_quo <- as_quosure(.op, base_env()) op <- quo_get_expr(op_quo) expr <- reduce(dots, function(x, y) expr((!!op)((!!x), (!!y)))) new_quosure(expr, quo_get_env(op_quo)) } dplyr/R/vec-case-match.R0000644000176200001440000000234714406402754014524 0ustar liggesusersvec_case_match <- function(needles, haystacks, values, ..., needles_arg = "needles", haystacks_arg = "haystacks", values_arg = "values", default = NULL, default_arg = "default", ptype = NULL, call = current_env()) { check_dots_empty0(...) obj_check_vector(needles, arg = needles_arg, call = call) obj_check_list(haystacks, arg = haystacks_arg, call = call) list_check_all_vectors(haystacks, arg = haystacks_arg, call = call) haystacks <- vec_cast_common( !!!haystacks, .to = needles, .arg = haystacks_arg, .call = call ) # Could be more efficient in C. Build a dictionary on `needles` # once and then reuse it with each haystack conditions <- map(haystacks, vec_in, needles = needles) size <- vec_size(needles) vec_case_when( conditions = conditions, values = values, conditions_arg = "", values_arg = values_arg, default = default, default_arg = default_arg, ptype = ptype, size = size, call = call ) } dplyr/R/dbplyr.R0000644000176200001440000001215614366556340013245 0ustar liggesusers#' Database and SQL generics. #' #' The `sql_` generics are used to build the different types of SQL queries. #' The default implementations in dbplyr generates ANSI 92 compliant SQL. #' The `db_` generics execute actions on the database. The default #' implementations in dbplyr typically just call the standard DBI S4 #' method. #' #' A few backend methods do not call the standard DBI S4 methods including #' #' * `db_data_type()`: Calls [DBI::dbDataType()] for every field #' (e.g. data frame column) and returns a vector of corresponding SQL data #' types #' #' * `db_save_query()`: Builds and executes a #' `CREATE [TEMPORARY] TABLE
...` SQL command. #' #' * `db_create_index()`: Builds and executes a #' `CREATE INDEX ON
` SQL command. #' #' * `db_drop_table()`: Builds and executes a #' `DROP TABLE [IF EXISTS]
` SQL command. #' #' * `db_analyze()`: Builds and executes an #' `ANALYZE
` SQL command. #' #' Currently, [copy_to()] is the only user of `db_begin()`, `db_commit()`, #' `db_rollback()`, `db_write_table()`, `db_create_indexes()`, `db_drop_table()` and #' `db_analyze()`. If you find yourself overriding many of these #' functions it may suggest that you should just override `copy_to()` #' instead. #' #' `db_create_table()` and `db_insert_into()` have been deprecated #' in favour of `db_write_table()`. #' #' @return Usually a logical value indicating success. Most failures should generate #' an error. However, `db_has_table()` should return `NA` if #' temporary tables cannot be listed with [DBI::dbListTables()] (due to backend #' API limitations for example). As a result, you methods will rely on the #' backend to throw an error if a table exists when it shouldn't. #' @name backend_dbplyr #' @param con A database connection. #' @keywords internal NULL #' @name backend_dbplyr #' @export db_desc <- function(x) UseMethod("db_desc") #' @name backend_dbplyr #' @export sql_translate_env <- function(con) UseMethod("sql_translate_env") #' @name backend_dbplyr #' @export db_list_tables <- function(con) UseMethod("db_list_tables") #' @name backend_dbplyr #' @export #' @param table A string, the table name. db_has_table <- function(con, table) UseMethod("db_has_table") #' @name backend_dbplyr #' @export #' @param fields A list of fields, as in a data frame. db_data_type <- function(con, fields) UseMethod("db_data_type") #' @export #' @name backend_dbplyr #' @export db_save_query <- function(con, sql, name, temporary = TRUE, ...) { UseMethod("db_save_query") } #' @name backend_dbplyr #' @export db_begin <- function(con, ...) UseMethod("db_begin") #' @name backend_dbplyr #' @export db_commit <- function(con, ...) UseMethod("db_commit") #' @name backend_dbplyr #' @export db_rollback <- function(con, ...) UseMethod("db_rollback") #' @name backend_dbplyr #' @export db_write_table <- function(con, table, types, values, temporary = FALSE, ...) { UseMethod("db_write_table") } #' @name backend_dbplyr #' @export db_create_table <- function(con, table, types, temporary = FALSE, ...) { UseMethod("db_create_table") } #' @name backend_dbplyr #' @export db_insert_into <- function(con, table, values, ...) { UseMethod("db_insert_into") } #' @name backend_dbplyr #' @export db_create_indexes <- function(con, table, indexes = NULL, unique = FALSE, ...) { UseMethod("db_create_indexes") } #' @name backend_dbplyr #' @export db_create_index <- function(con, table, columns, name = NULL, unique = FALSE, ...) { UseMethod("db_create_index") } #' @name backend_dbplyr #' @export db_drop_table <- function(con, table, force = FALSE, ...) { UseMethod("db_drop_table") } #' @name backend_dbplyr #' @export db_analyze <- function(con, table, ...) UseMethod("db_analyze") #' @export #' @rdname backend_dbplyr db_explain <- function(con, sql, ...) { UseMethod("db_explain") } #' @rdname backend_dbplyr #' @export db_query_fields <- function(con, sql, ...) { UseMethod("db_query_fields") } #' @rdname backend_dbplyr #' @export db_query_rows <- function(con, sql, ...) { UseMethod("db_query_rows") } #' @rdname backend_dbplyr #' @export sql_select <- function(con, select, from, where = NULL, group_by = NULL, having = NULL, order_by = NULL, limit = NULL, distinct = FALSE, ...) { UseMethod("sql_select") } #' @export #' @rdname backend_dbplyr sql_subquery <- function(con, from, name = random_table_name(), ...) { UseMethod("sql_subquery") } random_table_name <- function(n = 10) { paste0(sample(letters, n, replace = TRUE), collapse = "") } #' @rdname backend_dbplyr #' @export sql_join <- function(con, x, y, vars, type = "inner", by = NULL, ...) { UseMethod("sql_join") } #' @rdname backend_dbplyr #' @export sql_semi_join <- function(con, x, y, anti = FALSE, by = NULL, ...) { UseMethod("sql_semi_join") } #' @rdname backend_dbplyr #' @export sql_set_op <- function(con, x, y, method) { UseMethod("sql_set_op") } #' @rdname backend_dbplyr #' @export sql_escape_string <- function(con, x) UseMethod("sql_escape_string") #' @rdname backend_dbplyr #' @export sql_escape_ident <- function(con, x) UseMethod("sql_escape_ident") dplyr/R/if-else.R0000644000176200001440000000531714525503021013257 0ustar liggesusers#' Vectorised if-else #' #' `if_else()` is a vectorized [if-else][if]. Compared to the base R equivalent, #' [ifelse()], this function allows you to handle missing values in the #' `condition` with `missing` and always takes `true`, `false`, and `missing` #' into account when determining what the output type should be. #' #' @inheritParams rlang::args_dots_empty #' #' @param condition A logical vector #' #' @param true,false Vectors to use for `TRUE` and `FALSE` values of #' `condition`. #' #' Both `true` and `false` will be [recycled][vctrs::theory-faq-recycling] #' to the size of `condition`. #' #' `true`, `false`, and `missing` (if used) will be cast to their common type. #' #' @param missing If not `NULL`, will be used as the value for `NA` values of #' `condition`. Follows the same size and type rules as `true` and `false`. #' #' @param ptype An optional prototype declaring the desired output type. If #' supplied, this overrides the common type of `true`, `false`, and `missing`. #' #' @param size An optional size declaring the desired output size. If supplied, #' this overrides the size of `condition`. #' #' @return #' A vector with the same size as `condition` and the same type as the common #' type of `true`, `false`, and `missing`. #' #' Where `condition` is `TRUE`, the matching values from `true`, where it is #' `FALSE`, the matching values from `false`, and where it is `NA`, the matching #' values from `missing`, if provided, otherwise a missing value will be used. #' #' @export #' @examples #' x <- c(-5:5, NA) #' if_else(x < 0, NA, x) #' #' # Explicitly handle `NA` values in the `condition` with `missing` #' if_else(x < 0, "negative", "positive", missing = "missing") #' #' # Unlike `ifelse()`, `if_else()` preserves types #' x <- factor(sample(letters[1:5], 10, replace = TRUE)) #' ifelse(x %in% c("a", "b", "c"), x, NA) #' if_else(x %in% c("a", "b", "c"), x, NA) #' #' # `if_else()` is often useful for creating new columns inside of `mutate()` #' starwars %>% #' mutate(category = if_else(height < 100, "short", "tall"), .keep = "used") if_else <- function(condition, true, false, missing = NULL, ..., ptype = NULL, size = NULL) { check_dots_empty0(...) # Assert early since we `!` the `condition` check_logical(condition) conditions <- list( condition = condition, !condition ) values <- list( true = true, false = false ) vec_case_when( conditions = conditions, values = values, conditions_arg = "", values_arg = "", default = missing, default_arg = "missing", ptype = ptype, size = size, call = current_env() ) } dplyr/R/utils.R0000644000176200001440000000666014406402754013106 0ustar liggesusers commas <- function(...) paste0(..., collapse = ", ") compact_null <- function(x) { Filter(function(elt) !is.null(elt), x) } paste_line <- function(...) { paste(chr(...), collapse = "\n") } # Until vctrs::new_data_frame() forwards row names automatically dplyr_new_data_frame <- function(x = data.frame(), n = NULL, ..., row.names = NULL, class = NULL) { row.names <- row.names %||% .row_names_info(x, type = 0L) new_data_frame( x, n = n, ..., row.names = row.names, class = class ) } # Strips a list-like vector down to just names dplyr_new_list <- function(x) { if (!is_list(x)) { abort("`x` must be a VECSXP.", .internal = TRUE) } names <- names(x) if (is.null(names)) { attributes(x) <- NULL } else { attributes(x) <- list(names = names) } x } dplyr_new_tibble <- function(x, size) { # ~9x faster than `tibble::new_tibble()` for internal usage new_data_frame(x = x, n = size, class = c("tbl_df", "tbl")) } #' @param x A list #' @param fn An optional function of 1 argument to be applied to each list #' element of `x`. This allows you to further refine what elements should be #' flattened. `fn` should return a single `TRUE` or `FALSE`. #' @param recursive Should `list_flatten()` be applied recursively? If `TRUE`, #' it will continue to apply `list_flatten()` as long as at least one element #' of `x` was flattened in the previous iteration. #' @noRd list_flatten <- function(x, ..., fn = NULL, recursive = FALSE) { check_dots_empty0(...) obj_check_list(x) x <- unclass(x) loc <- map_lgl(x, obj_is_list) if (!is_null(fn)) { loc[loc] <- map_lgl(x[loc], fn) } not_loc <- !loc names <- names(x) if (!is_null(names)) { # Always prefer inner names, even if inner elements are actually unnamed. # This is what `rlang::flatten_if()` did, with a warning. We could also # use `name_spec` and `name_repair` for a more complete solution. names[loc] <- "" names(x) <- names } x[loc] <- map(x[loc], unclass) x[not_loc] <- map(x[not_loc], list) out <- list_unchop(x, ptype = list()) if (recursive && any(loc)) { out <- list_flatten(out, fn = fn, recursive = TRUE) } out } maybe_restart <- function(restart) { if (!is_null(findRestart(restart))) { invokeRestart(restart) } } expr_substitute <- function(expr, old, new) { expr <- duplicate(expr) switch(typeof(expr), language = node_walk_replace(node_cdr(expr), old, new), symbol = if (identical(expr, old)) return(new) ) expr } node_walk_replace <- function(node, old, new) { while (!is_null(node)) { switch(typeof(node_car(node)), language = if (!is_call(node_car(node), c("~", "function")) || is_call(node_car(node), "~", n = 2)) node_walk_replace(node_cdar(node), old, new), symbol = if (identical(node_car(node), old)) node_poke_car(node, new) ) node <- node_cdr(node) } } cli_collapse <- function(x, last = " and ") { cli::cli_vec(x, style = list("vec-last" = last)) } with_no_rlang_infix_labeling <- function(expr) { # TODO: Temporary patch for a slowdown seen with `rlang::as_label()` and infix # operators. A real solution likely involves lazy ALTREP vectors (#6681). # https://github.com/r-lib/rlang/commit/33db700d556b0b85a1fe78e14a53f95ac9248004 with_options("rlang:::use_as_label_infix" = FALSE, expr) } dplyr/R/compat-dbplyr.R0000644000176200001440000000356514272553254014527 0ustar liggesusers#' dbplyr compatibility functions #' #' @description #' In dplyr 0.7.0, a number of database and SQL functions moved from dplyr to #' dbplyr. The generic functions stayed in dplyr (since there is no easy way #' to conditionally import a generic from different packages), but many other #' SQL and database helper functions moved. If you have written a backend, #' these functions generate the code you need to work with both dplyr 0.5.0 #' dplyr 0.7.0. #' #' @keywords internal #' @export #' @examplesIf requireNamespace("dbplyr", quietly = TRUE) #' wrap_dbplyr_obj("build_sql") #' wrap_dbplyr_obj("base_agg") check_dbplyr <- function() { check_installed("dbplyr", "to communicate with database backends.") } #' @export #' @rdname check_dbplyr wrap_dbplyr_obj <- function(obj_name) { # Silence R CMD check NOTE `UQ<-` <- NULL obj <- getExportedValue("dbplyr", obj_name) obj_sym <- sym(obj_name) dbplyr_sym <- call("::", quote(dbplyr), obj_sym) dplyr_sym <- call("::", quote(dplyr), obj_sym) if (is.function(obj)) { args <- formals() pass_on <- map(set_names(names(args)), sym) dbplyr_call <- expr((!!dbplyr_sym)(!!!pass_on)) dplyr_call <- expr((!!dplyr_sym)(!!!pass_on)) } else { args <- list() dbplyr_call <- dbplyr_sym dplyr_call <- dplyr_sym } body <- expr({ if (utils::packageVersion("dplyr") > "0.5.0") { dplyr::check_dbplyr() !!dbplyr_call } else { !!dplyr_call } }) wrapper <- new_function(args, body, caller_env()) expr(!!obj_sym <- !!get_expr(wrapper)) } utils::globalVariables("!<-") #' @inherit dbplyr::sql #' @export sql <- function(...) { check_dbplyr() dbplyr::sql(...) } #' @inherit dbplyr::ident #' @export #' @examples #' # Identifiers are escaped with " #' @examplesIf requireNamespace("dbplyr", quietly = TRUE) #' ident("x") ident <- function(...) { check_dbplyr() dbplyr::ident(...) } dplyr/R/zzz.R0000644000176200001440000000314714525503021012567 0ustar liggesusers.onLoad <- function(libname, pkgname) { ns_dplyr <- ns_env(pkgname) op <- options() op.dplyr <- list( dplyr.show_progress = TRUE ) toset <- !(names(op.dplyr) %in% names(op)) if (any(toset)) options(op.dplyr[toset]) .Call(dplyr_init_library, ns_dplyr, ns_env("vctrs"), ns_env("rlang")) # TODO: For `arrange()`, `group_by()`, `with_order()`, and `nth()` until vctrs # changes `vec_order()` to the new ordering algorithm, at which point we # should switch from `vec_order_radix()` to `vec_order()` so vctrs can remove # it. env_bind( .env = ns_dplyr, vec_order_radix = import_vctrs("vec_order_radix") ) run_on_load() invisible() } .onAttach <- function(libname, pkgname) { setHook(packageEvent("plyr", "attach"), function(...) { packageStartupMessage(rule()) packageStartupMessage( "You have loaded plyr after dplyr - this is likely ", "to cause problems.\nIf you need functions from both plyr and dplyr, ", "please load plyr first, then dplyr:\nlibrary(plyr); library(dplyr)" ) packageStartupMessage(rule()) }) } .onDetach <- function(libpath) { setHook(packageEvent("plyr", "attach"), NULL, "replace") } import_vctrs <- function(name, optional = FALSE) { import_from(name, "vctrs", optional = optional) } import_from <- function(name, package, optional = FALSE) { ns <- getNamespace(package) if (!exists(name, mode = "function", envir = ns, inherits = FALSE)) { if (optional) { return(NULL) } abort(sprintf("No such '%s' function: `%s()`.", package, name)) } get(name, mode = "function", envir = ns, inherits = FALSE) } dplyr/R/vctrs.R0000644000176200001440000000115314266276767013120 0ustar liggesuserscommon_handler <- function(name){ function(cnd) { bullets <- c( glue("`{name}` must return compatible vectors across groups."), i = cnd_bullet_combine_details(cnd$x, cnd$x_arg), i = cnd_bullet_combine_details(cnd$y, cnd$y_arg) ) abort(bullets, class = "dplyr:::error_incompatible_combine") } } dplyr_vec_cast_common <- function(chunks, name) { withCallingHandlers( vec_cast_common(!!!chunks), error = common_handler(name) ) } dplyr_vec_ptype_common <- function(chunks, name) { withCallingHandlers( vec_ptype_common(!!!chunks), error = common_handler(name) ) } dplyr/R/deprec-tibble.R0000644000176200001440000000235714366556340014454 0ustar liggesusers#' Coerce to a tibble #' #' `r lifecycle::badge("deprecated")` #' Please use [tibble::as_tibble()] instead. #' #' @export #' @keywords internal #' @param data,x Object to coerce tbl_df <- function(data) { lifecycle::deprecate_warn("1.0.0", "tbl_df()", "tibble::as_tibble()", always = TRUE) # Works in tibble < 1.5.0 too, because .name_repair will be # swallowed by the ellipsis as_tibble(data, .name_repair = "check_unique") } #' @export #' @rdname tbl_df as.tbl <- function(x, ...) { lifecycle::deprecate_warn("1.0.0", "as.tbl()", "tibble::as_tibble()", always = TRUE) UseMethod("as.tbl") } #' @export as.tbl.tbl <- function(x, ...) x #' @export as.tbl.data.frame <- function(x, ...) { as_tibble(x) } #' Convert row names to an explicit variable. #' #' `r lifecycle::badge("deprecated")` #' Please use [tibble::rownames_to_column()] instead. #' #' @param df Input data frame with rownames. #' @param var Name of variable to use #' @keywords internal #' @export add_rownames <- function(df, var = "rowname") { lifecycle::deprecate_warn("1.0.0", "add_rownames()", "tibble::rownames_to_column()", always = TRUE) stopifnot(is.data.frame(df)) rn <- as_tibble(setNames(list(rownames(df)), var)) rownames(df) <- NULL bind_cols(rn, df) } dplyr/R/colwise-funs.R0000644000176200001440000000720514366556340014366 0ustar liggesusersas_fun_list <- function(.funs, .env, ..., .caller, .caller_arg = "...", error_call = caller_env(), .user_env = caller_env(2)) { args <- list2(...) force(.user_env) if (is_fun_list(.funs)) { if (!is_empty(args)) { .funs[] <- map(.funs, call_modify, !!!args) } return(.funs) } if (is_list(.funs) && length(.funs) > 1) { .funs <- auto_name_formulas(.funs) } if (!is_character(.funs) && !is_list(.funs)) { .funs <- list(.funs) } if(is_character(.funs) && is_null(names(.funs)) && length(.funs) != 1L) { names(.funs) <- .funs } funs <- map(.funs, function(.x){ if (is_formula(.x)) { if (is_quosure(.x)) { what <- paste0( "dplyr::", .caller, "(", .caller_arg, " = ", "'can\\'t contain quosures')" ) lifecycle::deprecate_warn( "0.8.3", what, details = "Please use a one-sided formula, a function, or a function name.", always = TRUE, env = .env, user_env = .user_env ) .x <- new_formula(NULL, quo_squash(.x), quo_get_env(.x)) } .x <- as_inlined_function(.x, env = .env) } else { if (is_character(.x)) { .x <- get(.x, .env, mode = "function") } else if (!is_function(.x)) { msg <- "`.funs` must be a one sided formula, a function, or a function name." abort(msg, call = error_call) } if (length(args)) { .x <- new_quosure( call2(.x, quote(.), !!!args), env = .env ) } } .x }) attr(funs, "have_name") <- any(names2(funs) != "") funs } auto_name_formulas <- function(funs) { where <- !have_name(funs) & map_lgl(funs, function(x) is_bare_formula(x) && is_call(f_rhs(x))) names(funs)[where] <- map_chr(funs[where], function(x) as_label(f_rhs(x)[[1]])) funs } as_fun <- function(.x, .env, .args, error_call = caller_env()) { quo <- as_quosure(.x, .env) # For legacy reasons, we support strings. Those are enclosed in the # empty environment and need to be switched to the caller environment. quo <- quo_set_env(quo, fun_env(quo, .env)) expr <- quo_get_expr(quo) if (is_call(expr, c("function", "~"))) { top_level <- as_string(expr[[1]]) msg <- glue("`{quo_text(expr)}` must be a function name (quoted or unquoted) or an unquoted call, not `{top_level}`.") abort(msg, call = error_call) } if (is_call(expr) && !is_call(expr, c("::", ":::"))) { expr <- call_modify(expr, !!!.args) } else { expr <- call2(expr, quote(.), !!!.args) } set_expr(quo, expr) } quo_as_function <- function(quo) { new_function(exprs(. = ), quo_get_expr(quo), quo_get_env(quo)) } fun_env <- function(quo, default_env) { env <- quo_get_env(quo) if (is_null(env) || identical(env, empty_env())) { default_env } else { env } } is_fun_list <- function(x) { inherits(x, "fun_list") } #' @export `[.fun_list` <- function(x, i) { structure(NextMethod(), class = "fun_list", has_names = attr(x, "has_names") ) } #' @export print.fun_list <- function(x, ..., width = getOption("width")) { cat("\n") names <- format(names(x)) code <- map_chr(x, function(x) deparse_trunc(quo_get_expr(x), width - 2 - nchar(names[1]))) cat(paste0("$ ", names, ": ", code, collapse = "\n")) cat("\n") invisible(x) } deparse_trunc <- function(x, width = getOption("width")) { text <- deparse(x, width.cutoff = width) if (length(text) == 1 && nchar(text) < width) return(text) paste0(substr(text[1], 1, width - 3), "...") } dplyr/R/case-match.R0000644000176200001440000001030114525503021013725 0ustar liggesusers#' A general vectorised `switch()` #' #' @description #' This function allows you to vectorise multiple [switch()] statements. Each #' case is evaluated sequentially and the first match for each element #' determines the corresponding value in the output vector. If no cases match, #' the `.default` is used. #' #' `case_match()` is an R equivalent of the SQL "simple" `CASE WHEN` statement. #' #' ## Connection to `case_when()` #' #' While [case_when()] uses logical expressions on the left-hand side of the #' formula, `case_match()` uses values to match against `.x` with. The following #' two statements are roughly equivalent: #' #' ``` #' case_when( #' x %in% c("a", "b") ~ 1, #' x %in% "c" ~ 2, #' x %in% c("d", "e") ~ 3 #' ) #' #' case_match( #' x, #' c("a", "b") ~ 1, #' "c" ~ 2, #' c("d", "e") ~ 3 #' ) #' ``` #' #' @param .x A vector to match against. #' #' @param ... <[`dynamic-dots`][rlang::dyn-dots]> A sequence of two-sided #' formulas: `old_values ~ new_value`. The right hand side (RHS) determines #' the output value for all values of `.x` that match the left hand side #' (LHS). #' #' The LHS must evaluate to the same type of vector as `.x`. It can be any #' length, allowing you to map multiple `.x` values to the same RHS value. #' If a value is repeated in the LHS, i.e. a value in `.x` matches to #' multiple cases, the first match is used. #' #' The RHS inputs will be coerced to their common type. Each RHS input will be #' [recycled][vctrs::theory-faq-recycling] to the size of `.x`. #' #' @param .default The value used when values in `.x` aren't matched by any of #' the LHS inputs. If `NULL`, the default, a missing value will be used. #' #' `.default` is [recycled][vctrs::theory-faq-recycling] to the size of #' `.x`. #' #' @param .ptype An optional prototype declaring the desired output type. If #' not supplied, the output type will be taken from the common type of #' all RHS inputs and `.default`. #' #' @return #' A vector with the same size as `.x` and the same type as the common type of #' the RHS inputs and `.default` (if not overridden by `.ptype`). #' #' @seealso [case_when()] #' #' @export #' @examples #' x <- c("a", "b", "a", "d", "b", NA, "c", "e") #' #' # `case_match()` acts like a vectorized `switch()`. #' # Unmatched values "fall through" as a missing value. #' case_match( #' x, #' "a" ~ 1, #' "b" ~ 2, #' "c" ~ 3, #' "d" ~ 4 #' ) #' #' # Missing values can be matched exactly, and `.default` can be used to #' # control the value used for unmatched values of `.x` #' case_match( #' x, #' "a" ~ 1, #' "b" ~ 2, #' "c" ~ 3, #' "d" ~ 4, #' NA ~ 0, #' .default = 100 #' ) #' #' # Input values can be grouped into the same expression to map them to the #' # same output value #' case_match( #' x, #' c("a", "b") ~ "low", #' c("c", "d", "e") ~ "high" #' ) #' #' # `case_match()` isn't limited to character input: #' y <- c(1, 2, 1, 3, 1, NA, 2, 4) #' #' case_match( #' y, #' c(1, 3) ~ "odd", #' c(2, 4) ~ "even", #' .default = "missing" #' ) #' #' # Setting `.default` to the original vector is a useful way to replace #' # selected values, leaving everything else as is #' case_match(y, NA ~ 0, .default = y) #' #' starwars %>% #' mutate( #' # Replace missings, but leave everything else alone #' hair_color = case_match(hair_color, NA ~ "unknown", .default = hair_color), #' # Replace some, but not all, of the species #' species = case_match( #' species, #' "Human" ~ "Humanoid", #' "Droid" ~ "Robot", #' c("Wookiee", "Ewok") ~ "Hairy", #' .default = species #' ), #' .keep = "used" #' ) case_match <- function(.x, ..., .default = NULL, .ptype = NULL) { args <- list2(...) args <- case_formula_evaluate( args = args, default_env = caller_env(), dots_env = current_env(), error_call = current_env() ) haystacks <- args$lhs values <- args$rhs vec_case_match( needles = .x, haystacks = haystacks, values = values, needles_arg = ".x", haystacks_arg = "", values_arg = "", default = .default, default_arg = ".default", ptype = .ptype, call = current_env() ) } dplyr/R/rename.R0000644000176200001440000000660314406402754013212 0ustar liggesusers#' Rename columns #' #' `rename()` changes the names of individual variables using #' `new_name = old_name` syntax; `rename_with()` renames columns using a #' function. #' #' @inheritParams arrange #' @param ... #' For `rename()`: <[`tidy-select`][dplyr_tidy_select]> Use #' `new_name = old_name` to rename selected variables. #' #' For `rename_with()`: additional arguments passed onto `.fn`. #' @return #' An object of the same type as `.data`. The output has the following #' properties: #' #' * Rows are not affected. #' * Column names are changed; column order is preserved. #' * Data frame attributes are preserved. #' * Groups are updated to reflect new names. #' @section Methods: #' This function is a **generic**, which means that packages can provide #' implementations (methods) for other classes. See the documentation of #' individual methods for extra arguments and differences in behaviour. #' #' The following methods are currently available in loaded packages: #' \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("rename")}. #' @family single table verbs #' @export #' @examples #' iris <- as_tibble(iris) # so it prints a little nicer #' rename(iris, petal_length = Petal.Length) #' #' # Rename using a named vector and `all_of()` #' lookup <- c(pl = "Petal.Length", sl = "Sepal.Length") #' rename(iris, all_of(lookup)) #' #' # If your named vector might contain names that don't exist in the data, #' # use `any_of()` instead #' lookup <- c(lookup, new = "unknown") #' try(rename(iris, all_of(lookup))) #' rename(iris, any_of(lookup)) #' #' rename_with(iris, toupper) #' rename_with(iris, toupper, starts_with("Petal")) #' rename_with(iris, ~ tolower(gsub(".", "_", .x, fixed = TRUE))) #' #' @examplesIf getRversion() > "4.0.1" #' # If your renaming function uses `paste0()`, make sure to set #' # `recycle0 = TRUE` to ensure that empty selections are recycled correctly #' try(rename_with( #' iris, #' ~ paste0("prefix_", .x), #' starts_with("nonexistent") #' )) #' #' rename_with( #' iris, #' ~ paste0("prefix_", .x, recycle0 = TRUE), #' starts_with("nonexistent") #' ) #' @export rename <- function(.data, ...) { UseMethod("rename") } #' @export rename.data.frame <- function(.data, ...) { loc <- tidyselect::eval_rename(expr(c(...)), .data) # eval_rename() only returns changes names <- names(.data) names[loc] <- names(loc) set_names(.data, names) } #' @export #' @rdname rename #' @param .fn A function used to transform the selected `.cols`. Should #' return a character vector the same length as the input. #' @param .cols <[`tidy-select`][dplyr_tidy_select]> Columns to rename; #' defaults to all columns. rename_with <- function(.data, .fn, .cols = everything(), ...) { UseMethod("rename_with") } #' @export rename_with.data.frame <- function(.data, .fn, .cols = everything(), ...) { .fn <- as_function(.fn) cols <- tidyselect::eval_select(enquo(.cols), .data, allow_rename = FALSE) names <- names(.data) sel <- vec_slice(names, cols) new <- .fn(sel, ...) if (!is_character(new)) { cli::cli_abort( "{.arg .fn} must return a character vector, not {.obj_type_friendly {new}}." ) } if (length(new) != length(sel)) { cli::cli_abort( "{.arg .fn} must return a vector of length {length(sel)}, not {length(new)}." ) } names <- vec_assign(names, cols, new) names <- vec_as_names(names, repair = "check_unique") set_names(.data, names) } dplyr/R/import-standalone-types-check.R0000644000176200001440000002556514406402754017630 0ustar liggesusers# Standalone file: do not edit by hand # Source: # ---------------------------------------------------------------------- # # --- # repo: r-lib/rlang # file: standalone-types-check.R # last-updated: 2023-02-15 # license: https://unlicense.org # dependencies: standalone-obj-type.R # --- # # ## Changelog # # 2023-02-15: # - Added `check_logical()`. # # - `check_bool()`, `check_number_whole()`, and # `check_number_decimal()` are now implemented in C. # # - For efficiency, `check_number_whole()` and # `check_number_decimal()` now take a `NULL` default for `min` and # `max`. This makes it possible to bypass unnecessary type-checking # and comparisons in the default case of no bounds checks. # # 2022-10-07: # - `check_number_whole()` and `_decimal()` no longer treat # non-numeric types such as factors or dates as numbers. Numeric # types are detected with `is.numeric()`. # # 2022-10-04: # - Added `check_name()` that forbids the empty string. # `check_string()` allows the empty string by default. # # 2022-09-28: # - Removed `what` arguments. # - Added `allow_na` and `allow_null` arguments. # - Added `allow_decimal` and `allow_infinite` arguments. # - Improved errors with absent arguments. # # # 2022-09-16: # - Unprefixed usage of rlang functions with `rlang::` to # avoid onLoad issues when called from rlang (#1482). # # 2022-08-11: # - Added changelog. # # nocov start # Scalars ----------------------------------------------------------------- .standalone_types_check_dot_call <- .Call check_bool <- function(x, ..., allow_na = FALSE, allow_null = FALSE, arg = caller_arg(x), call = caller_env()) { if (!missing(x) && .standalone_types_check_dot_call(ffi_standalone_is_bool_1.0.7, x, allow_na, allow_null)) { return(invisible(NULL)) } stop_input_type( x, c("`TRUE`", "`FALSE`"), ..., allow_na = allow_na, allow_null = allow_null, arg = arg, call = call ) } check_string <- function(x, ..., allow_empty = TRUE, allow_na = FALSE, allow_null = FALSE, arg = caller_arg(x), call = caller_env()) { if (!missing(x)) { is_string <- .rlang_check_is_string( x, allow_empty = allow_empty, allow_na = allow_na, allow_null = allow_null ) if (is_string) { return(invisible(NULL)) } } stop_input_type( x, "a single string", ..., allow_na = allow_na, allow_null = allow_null, arg = arg, call = call ) } .rlang_check_is_string <- function(x, allow_empty, allow_na, allow_null) { if (is_string(x)) { if (allow_empty || !is_string(x, "")) { return(TRUE) } } if (allow_null && is_null(x)) { return(TRUE) } if (allow_na && (identical(x, NA) || identical(x, na_chr))) { return(TRUE) } FALSE } check_name <- function(x, ..., allow_null = FALSE, arg = caller_arg(x), call = caller_env()) { if (!missing(x)) { is_string <- .rlang_check_is_string( x, allow_empty = FALSE, allow_na = FALSE, allow_null = allow_null ) if (is_string) { return(invisible(NULL)) } } stop_input_type( x, "a valid name", ..., allow_na = FALSE, allow_null = allow_null, arg = arg, call = call ) } IS_NUMBER_true <- 0 IS_NUMBER_false <- 1 IS_NUMBER_oob <- 2 check_number_decimal <- function(x, ..., min = NULL, max = NULL, allow_infinite = TRUE, allow_na = FALSE, allow_null = FALSE, arg = caller_arg(x), call = caller_env()) { if (missing(x)) { exit_code <- IS_NUMBER_false } else if (0 == (exit_code <- .standalone_types_check_dot_call( ffi_standalone_check_number_1.0.7, x, allow_decimal = TRUE, min, max, allow_infinite, allow_na, allow_null ))) { return(invisible(NULL)) } .stop_not_number( x, ..., exit_code = exit_code, allow_decimal = TRUE, min = min, max = max, allow_na = allow_na, allow_null = allow_null, arg = arg, call = call ) } check_number_whole <- function(x, ..., min = NULL, max = NULL, allow_na = FALSE, allow_null = FALSE, arg = caller_arg(x), call = caller_env()) { if (missing(x)) { exit_code <- IS_NUMBER_false } else if (0 == (exit_code <- .standalone_types_check_dot_call( ffi_standalone_check_number_1.0.7, x, allow_decimal = FALSE, min, max, allow_infinite = FALSE, allow_na, allow_null ))) { return(invisible(NULL)) } .stop_not_number( x, ..., exit_code = exit_code, allow_decimal = FALSE, min = min, max = max, allow_na = allow_na, allow_null = allow_null, arg = arg, call = call ) } .stop_not_number <- function(x, ..., exit_code, allow_decimal, min, max, allow_na, allow_null, arg, call) { if (exit_code == IS_NUMBER_oob) { min <- min %||% -Inf max <- max %||% Inf if (min > -Inf && max < Inf) { what <- sprintf("a number between %s and %s", min, max) } else if (x < min) { what <- sprintf("a number larger than %s", min) } else if (x > max) { what <- sprintf("a number smaller than %s", max) } else { abort("Unexpected state in OOB check", .internal = TRUE) } } else if (allow_decimal) { what <- "a number" } else { what <- "a whole number" } stop_input_type( x, what, ..., allow_na = allow_na, allow_null = allow_null, arg = arg, call = call ) } check_symbol <- function(x, ..., allow_null = FALSE, arg = caller_arg(x), call = caller_env()) { if (!missing(x)) { if (is_symbol(x)) { return(invisible(NULL)) } if (allow_null && is_null(x)) { return(invisible(NULL)) } } stop_input_type( x, "a symbol", ..., allow_null = allow_null, arg = arg, call = call ) } check_arg <- function(x, ..., allow_null = FALSE, arg = caller_arg(x), call = caller_env()) { if (!missing(x)) { if (is_symbol(x)) { return(invisible(NULL)) } if (allow_null && is_null(x)) { return(invisible(NULL)) } } stop_input_type( x, "an argument name", ..., allow_null = allow_null, arg = arg, call = call ) } check_call <- function(x, ..., allow_null = FALSE, arg = caller_arg(x), call = caller_env()) { if (!missing(x)) { if (is_call(x)) { return(invisible(NULL)) } if (allow_null && is_null(x)) { return(invisible(NULL)) } } stop_input_type( x, "a defused call", ..., allow_null = allow_null, arg = arg, call = call ) } check_environment <- function(x, ..., allow_null = FALSE, arg = caller_arg(x), call = caller_env()) { if (!missing(x)) { if (is_environment(x)) { return(invisible(NULL)) } if (allow_null && is_null(x)) { return(invisible(NULL)) } } stop_input_type( x, "an environment", ..., allow_null = allow_null, arg = arg, call = call ) } check_function <- function(x, ..., allow_null = FALSE, arg = caller_arg(x), call = caller_env()) { if (!missing(x)) { if (is_function(x)) { return(invisible(NULL)) } if (allow_null && is_null(x)) { return(invisible(NULL)) } } stop_input_type( x, "a function", ..., allow_null = allow_null, arg = arg, call = call ) } check_closure <- function(x, ..., allow_null = FALSE, arg = caller_arg(x), call = caller_env()) { if (!missing(x)) { if (is_closure(x)) { return(invisible(NULL)) } if (allow_null && is_null(x)) { return(invisible(NULL)) } } stop_input_type( x, "an R function", ..., allow_null = allow_null, arg = arg, call = call ) } check_formula <- function(x, ..., allow_null = FALSE, arg = caller_arg(x), call = caller_env()) { if (!missing(x)) { if (is_formula(x)) { return(invisible(NULL)) } if (allow_null && is_null(x)) { return(invisible(NULL)) } } stop_input_type( x, "a formula", ..., allow_null = allow_null, arg = arg, call = call ) } # Vectors ----------------------------------------------------------------- check_character <- function(x, ..., allow_null = FALSE, arg = caller_arg(x), call = caller_env()) { if (!missing(x)) { if (is_character(x)) { return(invisible(NULL)) } if (allow_null && is_null(x)) { return(invisible(NULL)) } } stop_input_type( x, "a character vector", ..., allow_null = allow_null, arg = arg, call = call ) } check_logical <- function(x, ..., allow_null = FALSE, arg = caller_arg(x), call = caller_env()) { if (!missing(x)) { if (is_logical(x)) { return(invisible(NULL)) } if (allow_null && is_null(x)) { return(invisible(NULL)) } } stop_input_type( x, "a logical vector", ..., allow_null = allow_null, arg = arg, call = call ) } # nocov end dplyr/R/reframe.R0000644000176200001440000001005714406402754013362 0ustar liggesusers#' Transform each group to an arbitrary number of rows #' #' @description #' `r lifecycle::badge("experimental")` #' #' While [summarise()] requires that each argument returns a single value, and #' [mutate()] requires that each argument returns the same number of rows as the #' input, `reframe()` is a more general workhorse with no requirements on the #' number of rows returned per group. #' #' `reframe()` creates a new data frame by applying functions to columns of an #' existing data frame. It is most similar to `summarise()`, with two big #' differences: #' #' - `reframe()` can return an arbitrary number of rows per group, while #' `summarise()` reduces each group down to a single row. #' #' - `reframe()` always returns an ungrouped data frame, while `summarise()` #' might return a grouped or rowwise data frame, depending on the scenario. #' #' We expect that you'll use `summarise()` much more often than `reframe()`, but #' `reframe()` can be particularly helpful when you need to apply a complex #' function that doesn't return a single summary value. #' #' @inheritParams args_by #' @inheritParams arrange #' #' @param ... <[`data-masking`][rlang::args_data_masking]> #' #' Name-value pairs of functions. The name will be the name of the variable in #' the result. The value can be a vector of any length. #' #' Unnamed data frame values add multiple columns from a single expression. #' #' @return #' If `.data` is a tibble, a tibble. Otherwise, a data.frame. #' #' * The rows originate from the underlying grouping keys. #' * The columns are a combination of the grouping keys and the #' expressions that you provide. #' * The output is always ungrouped. #' * Data frame attributes are **not** preserved, because `reframe()` #' fundamentally creates a new data frame. #' #' @section Connection to tibble: #' `reframe()` is theoretically connected to two functions in tibble, #' [tibble::enframe()] and [tibble::deframe()]: #' #' * `enframe()`: vector -> data frame #' * `deframe()`: data frame -> vector #' * `reframe()`: data frame -> data frame #' #' @section Methods: #' This function is a **generic**, which means that packages can provide #' implementations (methods) for other classes. See the documentation of #' individual methods for extra arguments and differences in behaviour. #' #' The following methods are currently available in loaded packages: #' \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("reframe")}. #' #' @family single table verbs #' @export #' @examples #' table <- c("a", "b", "d", "f") #' #' df <- tibble( #' g = c(1, 1, 1, 2, 2, 2, 2), #' x = c("e", "a", "b", "c", "f", "d", "a") #' ) #' #' # `reframe()` allows you to apply functions that return #' # an arbitrary number of rows #' df %>% #' reframe(x = intersect(x, table)) #' #' # Functions are applied per group, and each group can return a #' # different number of rows. #' df %>% #' reframe(x = intersect(x, table), .by = g) #' #' # The output is always ungrouped, even when using `group_by()` #' df %>% #' group_by(g) %>% #' reframe(x = intersect(x, table)) #' #' # You can add multiple columns at once using a single expression by returning #' # a data frame. #' quantile_df <- function(x, probs = c(0.25, 0.5, 0.75)) { #' tibble( #' val = quantile(x, probs, na.rm = TRUE), #' quant = probs #' ) #' } #' #' x <- c(10, 15, 18, 12) #' quantile_df(x) #' #' starwars %>% #' reframe(quantile_df(height)) #' #' starwars %>% #' reframe(quantile_df(height), .by = homeworld) #' #' starwars %>% #' reframe( #' across(c(height, mass), quantile_df, .unpack = TRUE), #' .by = homeworld #' ) reframe <- function(.data, ..., .by = NULL) { UseMethod("reframe") } #' @export reframe.data.frame <- function(.data, ..., .by = NULL) { by <- compute_by({{ .by }}, .data, by_arg = ".by", data_arg = ".data") cols <- summarise_cols(.data, dplyr_quosures(...), by, "reframe") out <- summarise_build(by, cols) if (!is_tibble(.data)) { # The `by` group data we build from is always a tibble, # so we have to manually downcast as needed out <- as.data.frame(out) } out } dplyr/R/select-helpers.R0000644000176200001440000000437714366556340014676 0ustar liggesusers#' Select grouping variables #' #' This selection helpers matches grouping variables. It can be used #' in [select()] or [vars()] selections. #' #' @param data For advanced use only. The default `NULL` automatically #' finds the "current" data frames. #' @param vars Deprecated; please use data instead. #' @seealso [groups()] and [group_vars()] for retrieving the grouping #' variables outside selection contexts. #' #' @examples #' gdf <- iris %>% group_by(Species) #' gdf %>% select(group_cols()) #' #' # Remove the grouping variables from mutate selections: #' gdf %>% mutate_at(vars(-group_cols()), `/`, 100) #' # -> No longer necessary with across() #' gdf %>% mutate(across(everything(), ~ . / 100)) #' @export group_cols <- function(vars = NULL, data = NULL) { # So group_cols() continues to work in _at() helpers. data <- data %||% tryCatch(tidyselect::peek_data(), error = function(e) NULL) if (!is.null(data)) { match(group_vars(data), tbl_vars(data)) } else { group_cols_legacy(vars) } } group_cols_legacy <- function(vars = NULL) { if (!is.null(vars)) { lifecycle::deprecate_warn( "1.0.0", "group_cols(vars = )", details = "Use `data` with entire dataframe instead", always = TRUE ) } vars <- vars %||% tidyselect::peek_vars() if (is_sel_vars(vars)) { matches <- match(vars %@% groups, vars) if (anyNA(matches)) { abort("Can't find the grouping variables.") } matches } else { int() } } # Alias required for help links in downstream packages #' @aliases select_helpers #' @importFrom tidyselect contains #' @export tidyselect::contains #' @importFrom tidyselect ends_with #' @export tidyselect::ends_with #' @importFrom tidyselect everything #' @export tidyselect::everything #' @importFrom tidyselect matches #' @export tidyselect::matches #' @importFrom tidyselect num_range #' @export tidyselect::num_range #' @importFrom tidyselect one_of #' @export tidyselect::one_of #' @importFrom tidyselect starts_with #' @export tidyselect::starts_with #' @importFrom tidyselect last_col #' @export tidyselect::last_col #' @importFrom tidyselect any_of #' @export tidyselect::any_of #' @importFrom tidyselect all_of #' @export tidyselect::all_of #' @importFrom tidyselect where #' @export tidyselect::where dplyr/R/group-data.R0000644000176200001440000001020614406402754014000 0ustar liggesusers#' Grouping metadata #' #' @description #' This collection of functions accesses data about grouped data frames in #' various ways: #' #' * `group_data()` returns a data frame that defines the grouping structure. #' The columns give the values of the grouping variables. The last column, #' always called `.rows`, is a list of integer vectors that gives the #' location of the rows in each group. #' #' * `group_keys()` returns a data frame describing the groups. #' #' * `group_rows()` returns a list of integer vectors giving the rows that #' each group contains. #' #' * `group_indices()` returns an integer vector the same length as `.data` #' that gives the group that each row belongs to. #' #' * `group_vars()` gives names of grouping variables as character vector. #' #' * `groups()` gives the names of the grouping variables as a list of symbols. #' #' * `group_size()` gives the size of each group. #' #' * `n_groups()` gives the total number of groups. #' #' See [context] for equivalent functions that return values for the _current_ #' group. #' @param .data,.tbl,x A data frame or extension (like a tibble or grouped #' tibble). #' @param ... Use of `...` is now deprecated; please use `group_by()` first #' instead. #' @keywords internal #' @examples #' df <- tibble(x = c(1,1,2,2)) #' group_vars(df) #' group_rows(df) #' group_data(df) #' group_indices(df) #' #' gf <- group_by(df, x) #' group_vars(gf) #' group_rows(gf) #' group_data(gf) #' group_indices(gf) #' @export group_data <- function(.data) { UseMethod("group_data") } #' @export group_data.data.frame <- function(.data) { size <- nrow(.data) out <- seq_len(size) out <- new_list_of(list(out), ptype = integer()) out <- list(.rows = out) out <- new_data_frame(out, n = 1L) out } #' @export group_data.tbl_df <- function(.data) { out <- NextMethod() out <- dplyr_new_tibble(out, size = 1L) out } #' @export group_data.rowwise_df <- function(.data) { attr(.data, "groups") } #' @export group_data.grouped_df <- function(.data) { error_call <- current_env() withCallingHandlers( validate_grouped_df(.data), error = function(cnd) { msg <- glue("`.data` must be a valid object.") abort(msg, parent = cnd, call = error_call) } ) attr(.data, "groups") } # ------------------------------------------------------------------------- #' @rdname group_data #' @export group_keys <- function(.tbl, ...) { UseMethod("group_keys") } #' @export group_keys.data.frame <- function(.tbl, ...) { if (dots_n(...) > 0) { lifecycle::deprecate_warn( "1.0.0", "group_keys(... = )", details = "Please `group_by()` first", always = TRUE ) .tbl <- group_by(.tbl, ...) } out <- group_data(.tbl) group_keys0(out) } group_keys0 <- function(x) { # Compute keys directly from `group_data()` results .Call(`dplyr_group_keys`, x) } #' @rdname group_data #' @export group_rows <- function(.data) { group_data(.data)[[".rows"]] } #' @export #' @rdname group_data group_indices <- function(.data, ...) { if (nargs() == 0) { lifecycle::deprecate_warn("1.0.0", "group_indices()", "cur_group_id()", always = TRUE) return(cur_group_id()) } UseMethod("group_indices") } #' @export group_indices.data.frame <- function(.data, ...) { if (dots_n(...) > 0) { lifecycle::deprecate_warn( "1.0.0", "group_indices(... = )", details = "Please `group_by()` first", always = TRUE ) .data <- group_by(.data, ...) } .Call(`dplyr_group_indices`, .data, group_rows(.data)) } #' @export #' @rdname group_data group_vars <- function(x) { UseMethod("group_vars") } #' @export group_vars.data.frame <- function(x) { setdiff(names(group_data(x)), ".rows") } #' @export #' @rdname group_data groups <- function(x) { UseMethod("groups") } #' @export groups.data.frame <- function(x) { syms(group_vars(x)) } #' @export #' @rdname group_data group_size <- function(x) UseMethod("group_size") #' @export group_size.data.frame <- function(x) { lengths(group_rows(x)) } #' @export #' @rdname group_data n_groups <- function(x) UseMethod("n_groups") #' @export n_groups.data.frame <- function(x) { nrow(group_data(x)) } dplyr/R/sets.R0000644000176200001440000001357614472225345012732 0ustar liggesusers#' Set operations #' #' @description #' Perform set operations using the rows of a data frame. #' #' * `intersect(x, y)` finds all rows in both `x` and `y`. #' * `union(x, y)` finds all rows in either `x` or `y`, excluding duplicates. #' * `union_all(x, y)` finds all rows in either `x` or `y`, including duplicates. #' * `setdiff(x, y)` finds all rows in `x` that aren't in `y`. #' * `symdiff(x, y)` computes the symmetric difference, i.e. all rows in #' `x` that aren't in `y` and all rows in `y` that aren't in `x`. #' * `setequal(x, y)` returns `TRUE` if `x` and `y` contain the same rows #' (ignoring order). #' #' Note that `intersect()`, `union()`, `setdiff()`, and `symdiff()` remove #' duplicates in `x` and `y`. #' #' # Base functions #' `intersect()`, `union()`, `setdiff()`, and `setequal()` override the base #' functions of the same name in order to make them generic. The existing #' behaviour for vectors is preserved by providing default methods that call #' the base functions. #' #' @param x,y Pair of compatible data frames. A pair of data frames is #' compatible if they have the same column names (possibly in different #' orders) and compatible types. #' @inheritParams rlang::args_dots_empty #' @name setops #' @examples #' df1 <- tibble(x = 1:3) #' df2 <- tibble(x = 3:5) #' #' intersect(df1, df2) #' union(df1, df2) #' union_all(df1, df2) #' setdiff(df1, df2) #' setdiff(df2, df1) #' symdiff(df1, df2) #' #' setequal(df1, df2) #' setequal(df1, df1[3:1, ]) #' #' # Note that the following functions remove pre-existing duplicates: #' df1 <- tibble(x = c(1:3, 3, 3)) #' df2 <- tibble(x = c(3:5, 5)) #' #' intersect(df1, df2) #' union(df1, df2) #' setdiff(df1, df2) #' symdiff(df1, df2) NULL #' @name setops #' @aliases intersect #' @usage intersect(x, y, ...) #' @importFrom generics intersect #' @export intersect NULL #' @name setops #' @aliases union #' @usage union(x, y, ...) #' @importFrom generics union #' @export union NULL #' @rdname setops #' @export union_all <- function(x, y, ...) UseMethod("union_all") #' @export union_all.default <- function (x, y, ...) { check_dots_empty() vec_c(x, y) } #' @name setops #' @aliases setdiff #' @usage setdiff(x, y, ...) #' @importFrom generics setdiff #' @export setdiff NULL #' @name setops #' @aliases setequal #' @usage setequal(x, y, ...) #' @importFrom generics setequal #' @export setequal NULL #' @rdname setops #' @export symdiff <- function(x, y, ...) { UseMethod("symdiff") } #' @export symdiff.default <- function (x, y, ...) { check_dots_empty() # Default is defined in terms of base R methods setdiff(union(x, y), intersect(x, y)) } #' @export intersect.data.frame <- function(x, y, ...) { check_dots_empty() check_compatible(x, y) out <- vec_set_intersect(x, y, error_call = current_env()) dplyr_reconstruct(out, x) } #' @export union.data.frame <- function(x, y, ...) { check_dots_empty() check_compatible(x, y) out <- vec_set_union(x, y, error_call = current_env()) dplyr_reconstruct(out, x) } #' @export union_all.data.frame <- function(x, y, ...) { check_dots_empty() check_compatible(x, y) out <- vec_rbind(x, y) dplyr_reconstruct(out, x) } #' @export setdiff.data.frame <- function(x, y, ...) { check_dots_empty() check_compatible(x, y) out <- vec_set_difference(x, y, error_call = current_env()) dplyr_reconstruct(out, x) } #' @export setequal.data.frame <- function(x, y, ...) { check_dots_empty() check_compatible(x, y) cast <- vec_cast_common(x = x, y = y) all(vec_in(cast$x, cast$y)) && all(vec_in(cast$y, cast$x)) } #' @export symdiff.data.frame <- function(x, y, ...) { check_dots_empty() check_compatible(x, y) out <- vec_set_symmetric_difference(x, y, error_call = current_env()) dplyr_reconstruct(out, x) } # Helpers ----------------------------------------------------------------- is_compatible <- function(x, y, ignore_col_order = TRUE, convert = TRUE) { if (!is.data.frame(y)) { return("`y` must be a data frame.") } nc <- ncol(x) if (nc != ncol(y)) { return( c(x = glue("Different number of columns: {nc} vs {ncol(y)}.")) ) } names_x <- names(x) names_y <- names(y) names_y_not_in_x <- setdiff(names_y, names_x) names_x_not_in_y <- setdiff(names_x, names_y) if (length(names_y_not_in_x) == 0L && length(names_x_not_in_y) == 0L) { # check if same order if (!isTRUE(ignore_col_order)) { if (!identical(names_x, names_y)) { return(c(x = "Same column names, but different order.")) } } } else { # names are not the same, explain why msg <- c() if (length(names_y_not_in_x)) { wrong <- glue_collapse(glue('`{names_y_not_in_x}`'), sep = ", ") msg <- c( msg, x = glue("Cols in `y` but not `x`: {wrong}.") ) } if (length(names_x_not_in_y)) { wrong <- glue_collapse(glue('`{names_x_not_in_y}`'), sep = ", ") msg <- c( msg, x = glue("Cols in `x` but not `y`: {wrong}.") ) } return(msg) } msg <- c() for (name in names_x) { x_i <- x[[name]] y_i <- y[[name]] if (convert) { tryCatch( vec_ptype2(x_i, y_i), error = function(e) { msg <<- c( msg, x = glue("Incompatible types for column `{name}`: {vec_ptype_full(x_i)} vs {vec_ptype_full(y_i)}.") ) } ) } else { if (!identical(vec_ptype(x_i), vec_ptype(y_i))) { msg <- c( msg, x = glue("Different types for column `{name}`: {vec_ptype_full(x_i)} vs {vec_ptype_full(y_i)}.") ) } } } if (length(msg)) { return(msg) } TRUE } check_compatible <- function(x, y, ignore_col_order = TRUE, convert = TRUE, error_call = caller_env()) { compat <- is_compatible(x, y, ignore_col_order = ignore_col_order, convert = convert) if (isTRUE(compat)) { return() } abort(c("`x` and `y` are not compatible.", compat), call = error_call) } dplyr/R/group-trim.R0000644000176200001440000000237514366556340014060 0ustar liggesusers#' Trim grouping structure #' #' @description #' `r lifecycle::badge("experimental")` #' Drop unused levels of all factors that are used as grouping variables, #' then recalculates the grouping structure. #' #' `group_trim()` is particularly useful after a [filter()] that is intended #' to select a subset of groups. #' #' @param .tbl A [grouped data frame][grouped_df()] #' @param .drop See [group_by()] #' @return A [grouped data frame][grouped_df()] #' @export #' @family grouping functions #' @examples #' iris %>% #' group_by(Species) %>% #' filter(Species == "setosa", .preserve = TRUE) %>% #' group_trim() group_trim <- function(.tbl, .drop = group_by_drop_default(.tbl)) { lifecycle::signal_stage("experimental", "group_trim()") UseMethod("group_trim") } #' @export group_trim.data.frame <- function(.tbl, .drop = group_by_drop_default(.tbl)) { .tbl } #' @export group_trim.grouped_df <- function(.tbl, .drop = group_by_drop_default(.tbl)) { vars <- group_vars(.tbl) ungrouped <- ungroup(.tbl) # names of the factors that should be droplevels()'d fgroups <- names(select_if(select_at(ungrouped, vars), is.factor)) # drop levels dropped <- mutate_at(ungrouped, fgroups, droplevels) # regroup group_by_at(dropped, vars, .drop = .drop) } dplyr/R/generics.R0000644000176200001440000002273414525503021013534 0ustar liggesusers#' Extending dplyr with new data frame subclasses #' #' @description #' `r lifecycle::badge("experimental")` #' #' These three functions, along with `names<-` and 1d numeric `[` #' (i.e. `x[loc]`) methods, provide a minimal interface for extending dplyr #' to work with new data frame subclasses. This means that for simple cases #' you should only need to provide a couple of methods, rather than a method #' for every dplyr verb. #' #' These functions are a stop-gap measure until we figure out how to solve #' the problem more generally, but it's likely that any code you write to #' implement them will find a home in what comes next. #' #' # Basic advice #' #' This section gives you basic advice if you want to extend dplyr to work with #' your custom data frame subclass, and you want the dplyr methods to behave #' in basically the same way. #' #' * If you have data frame attributes that don't depend on the rows or columns #' (and should unconditionally be preserved), you don't need to do anything. #' The one exception to this is if your subclass extends a data.frame #' directly rather than extending a tibble. The `[.data.frame` method does not #' preserve attributes, so you'll need to write a `[` method for your subclass #' that preserves attributes important for your class. #' #' * If you have __scalar__ attributes that depend on __rows__, implement a #' `dplyr_reconstruct()` method. Your method should recompute the attribute #' depending on rows now present. #' #' * If you have __scalar__ attributes that depend on __columns__, implement a #' `dplyr_reconstruct()` method and a 1d `[` method. For example, if your #' class requires that certain columns be present, your method should return #' a data.frame or tibble when those columns are removed. #' #' * If your attributes are __vectorised__ over __rows__, implement a #' `dplyr_row_slice()` method. This gives you access to `i` so you can #' modify the row attribute accordingly. You'll also need to think carefully #' about how to recompute the attribute in `dplyr_reconstruct()`, and #' you will need to carefully verify the behaviour of each verb, and provide #' additional methods as needed. #' #' * If your attributes that are __vectorised__ over __columns__, implement #' `dplyr_col_modify()`, 1d `[`, and `names<-` methods. All of these methods #' know which columns are being modified, so you can update the column #' attribute according. You'll also need to think carefully about how to #' recompute the attribute in `dplyr_reconstruct()`, and you will need to #' carefully verify the behaviour of each verb, and provide additional #' methods as needed. #' #' # Current usage #' #' * `arrange()`, `filter()`, `slice()` (and the rest of the `slice_*()` #' family), `semi_join()`, and `anti_join()` work by generating a vector of #' row indices, and then subsetting with `dplyr_row_slice()`. #' #' * `mutate()` generates a list of new column value (using `NULL` to indicate #' when columns should be deleted), then passes that to `dplyr_col_modify()`. #' It also uses 1d `[` to implement `.keep`, and will call `relocate()` if #' either `.before` or `.after` are supplied. #' #' * `summarise()` and `reframe()` work similarly to `mutate()` but the data #' modified by `dplyr_col_modify()` comes from `group_data()` or is built #' from `.by`. #' #' * `select()` uses 1d `[` to select columns, then `names<-` to rename them. #' `rename()` just uses `names<-`. `relocate()` just uses 1d `[`. #' #' * `inner_join()`, `left_join()`, `right_join()`, and `full_join()` #' coerce `x` to a tibble, modify the rows, then use `dplyr_reconstruct()` #' to convert back to the same type as `x`. #' #' * `nest_join()` converts both `x` and `y` to tibbles, modifies the rows, #' and uses `dplyr_col_modify()` to handle modified key variables and the #' list-column that `y` becomes. It also uses `dplyr_reconstruct()` to convert #' the outer result back to the type of `x`, and to convert the nested tibbles #' back to the type of `y`. #' #' * `distinct()` does a `mutate()` if any expressions are present, then #' uses 1d `[` to select variables to keep, then `dplyr_row_slice()` to #' select distinct rows. #' #' Note that `group_by()` and `ungroup()` don't use any of these generics and #' you'll need to provide methods for them directly, or rely on `.by` for #' per-operation grouping. #' #' @keywords internal #' @param data A tibble. We use tibbles because they avoid some inconsistent #' subset-assignment use cases. #' @name dplyr_extending NULL #' @export #' @rdname dplyr_extending #' @param i A numeric or logical vector that indexes the rows of `data`. dplyr_row_slice <- function(data, i, ...) { if (!is.numeric(i) && !is.logical(i)) { abort("`i` must be a numeric or logical vector.") } UseMethod("dplyr_row_slice") } #' @export dplyr_row_slice.data.frame <- function(data, i, ...) { dplyr_reconstruct(vec_slice(data, i), data) } #' @export dplyr_row_slice.grouped_df <- function(data, i, ..., preserve = FALSE) { out <- vec_slice(as.data.frame(data), i) # Index into group_indices, then use that to restore the grouping structure groups <- group_data(data) new_id <- vec_slice(group_indices(data), i) new_grps <- vec_group_loc(new_id) rows <- rep(list_of(integer()), length.out = nrow(groups)) rows[new_grps$key] <- new_grps$loc groups$.rows <- rows if (!preserve && isTRUE(attr(groups, ".drop"))) { groups <- group_data_trim(groups) } new_grouped_df(out, groups) } #' @export dplyr_row_slice.rowwise_df <- function(data, i, ..., preserve = FALSE) { out <- vec_slice(data, i) group_data <- vec_slice(group_keys(data), i) new_rowwise_df(out, group_data) } #' @export #' @rdname dplyr_extending #' @param cols A named list used to modify columns. A `NULL` value should remove #' an existing column. dplyr_col_modify <- function(data, cols) { UseMethod("dplyr_col_modify") } #' @export dplyr_col_modify.data.frame <- function(data, cols) { # Must be implemented from first principles to avoiding edge cases in # [.data.frame and [.tibble (2.1.3 and earlier). # Apply tidyverse recycling rules cols <- vec_recycle_common(!!!cols, .size = nrow(data)) # Transform to list to avoid stripping inner names with `[[<-` out <- as.list(vec_data(data)) nms <- as_utf8_character(names2(cols)) names(out) <- as_utf8_character(names2(out)) for (i in seq_along(cols)) { nm <- nms[[i]] out[[nm]] <- cols[[i]] } # Transform back to data frame before reconstruction row_names <- .row_names_info(data, type = 0L) out <- new_data_frame(out, n = nrow(data), row.names = row_names) dplyr_reconstruct(out, data) } #' @export dplyr_col_modify.grouped_df <- function(data, cols) { out <- dplyr_col_modify(as_tibble(data), cols) if (any(names(cols) %in% group_vars(data))) { # regroup grouped_df(out, group_vars(data), drop = group_by_drop_default(data)) } else { new_grouped_df(out, group_data(data)) } } #' @export dplyr_col_modify.rowwise_df <- function(data, cols) { out <- dplyr_col_modify(as_tibble(data), cols) rowwise_df(out, group_vars(data)) } #' @param template Template data frame to use for restoring attributes. #' @export #' @rdname dplyr_extending dplyr_reconstruct <- function(data, template) { # Strip attributes before dispatch to make it easier to implement # methods and prevent unexpected leaking of irrelevant attributes. # This also enforces that `data` is a well-formed data frame. data <- dplyr_new_data_frame(data) return(dplyr_reconstruct_dispatch(data, template)) UseMethod("dplyr_reconstruct", template) } dplyr_reconstruct_dispatch <- function(data, template) { UseMethod("dplyr_reconstruct", template) } #' @export dplyr_reconstruct.data.frame <- function(data, template) { .Call(ffi_dplyr_reconstruct, data, template) } #' @export dplyr_reconstruct.grouped_df <- function(data, template) { group_vars <- group_intersect(template, data) grouped_df(data, group_vars, drop = group_by_drop_default(template)) } #' @export dplyr_reconstruct.rowwise_df <- function(data, template) { group_vars <- group_intersect(template, data) rowwise_df(data, group_vars) } dplyr_col_select <- function(.data, loc, error_call = caller_env()) { loc <- vec_as_location(loc, n = ncol(.data), names = names(.data)) out <- .data[loc] if (!inherits(out, "data.frame")) { classes_data <- glue_collapse(class(.data), sep = "/") classes_out <- glue_collapse(class(out), sep = "/") bullets <- c( "Can't reconstruct data frame.", x = glue("The `[` method for class <{classes_data}> must return a data frame."), i = glue("It returned a <{classes_out}>.") ) abort(bullets, call = error_call) } if (length(out) != length(loc)) { classes_data <- glue_collapse(class(.data), sep = "/") classes_out <- glue_collapse(class(out), sep = "/") s <- function(x) if (length(x) == 1) "" else "s" bullets <- c( "Can't reconstruct data frame.", x = glue("The `[` method for class <{classes_data}> must return a data frame with {length(loc)} column{s(loc)}."), i = glue("It returned a <{classes_out}> of {length(out)} column{s(out)}.") ) abort(bullets, call = error_call) } # Patch base data frames and data.table (#6171) to restore extra attributes that `[.data.frame` drops. # We require `[` methods to keep extra attributes for all data frame subclasses. if (identical(class(.data), "data.frame") || identical(class(.data), c("data.table", "data.frame"))) { out <- dplyr_reconstruct(out, .data) } out } dplyr/R/doc-params.R0000644000176200001440000000225714472225345013774 0ustar liggesusers#' Argument type: tidy-select #' #' @description #' This page describes the `` argument modifier which indicates #' the argument supports **tidy selections**. Tidy selection provides a concise #' dialect of R for selecting variables based on their names or properties. #' #' Tidy selection is a variant of tidy evaluation. This means that inside #' functions, tidy-select arguments require special attention, as described in #' the *Indirection* section below. If you've never heard of tidy evaluation #' before, start with `vignette("programming")`. #' #' #' # Overview of selection features #' #' ```{r, child = "man/rmd/overview.Rmd"} #' ``` #' #' #' # Indirection #' #' There are two main cases: #' #' * If you have a character vector of column names, use `all_of()` #' or `any_of()`, depending on whether or not you want unknown variable #' names to cause an error, e.g. `select(df, all_of(vars))`, #' `select(df, !any_of(vars))`. #' #' * If you want the user to be able to supply a tidyselect specification in #' a function argument, embrace the function argument, e.g. #' `select(df, {{ vars }})`. #' #' @keywords internal #' @name dplyr_tidy_select NULL dplyr/R/error.R0000644000176200001440000000124214366556340013074 0ustar liggesusers# ngettext() does extra work, this function is a simpler version ntext <- function(n, msg1, msg2) { if (n == 1) msg1 else msg2 } fmt_pos_args <- function(x) { args <- ntext(length(x), "Argument", "Arguments") glue("{args} {fmt_comma(x)}") } fmt_cols <- function(x) { cols <- ntext(length(x), "Column", "Columns") glue("{cols} {fmt_obj(x)}") } fmt_obj <- function(x) { fmt_comma(fmt_obj1(x)) } fmt_obj1 <- function(x) { paste0("`", x, "`") } fmt_classes <- function(x) { paste(class(x), collapse = "/") } fmt_comma <- function(..., .max = 6) { x <- paste0(...) if (length(x) > .max) { length(x) <- .max x[[.max]] <- "..." } commas(x) } dplyr/R/colwise-group-by.R0000644000176200001440000000646214406402754015155 0ustar liggesusers#' Group by a selection of variables #' #' @description #' `r lifecycle::badge("superseded")` #' #' Scoped verbs (`_if`, `_at`, `_all`) have been superseded by the use of #' [pick()] or [across()] in an existing verb. See `vignette("colwise")` for #' details. #' #' These [scoped] variants of [group_by()] group a data frame by a #' selection of variables. Like [group_by()], they have optional #' [mutate] semantics. #' #' @inheritParams scoped #' @inheritParams group_by #' @param .add See [group_by()] #' #' @export #' #' @section Grouping variables: #' #' Existing grouping variables are maintained, even if not included in #' the selection. #' #' @keywords internal #' @examples #' # Group a data frame by all variables: #' group_by_all(mtcars) #' # -> #' mtcars %>% group_by(pick(everything())) #' #' # Group by variables selected with a predicate: #' group_by_if(iris, is.factor) #' # -> #' iris %>% group_by(pick(where(is.factor))) #' #' # Group by variables selected by name: #' group_by_at(mtcars, vars(vs, am)) #' # -> #' mtcars %>% group_by(pick(vs, am)) #' #' # Like group_by(), the scoped variants have optional mutate #' # semantics. This provide a shortcut for group_by() + mutate(): #' d <- tibble(x=c(1,1,2,2), y=c(1,2,1,2)) #' group_by_all(d, as.factor) #' # -> #' d %>% group_by(across(everything(), as.factor)) #' #' group_by_if(iris, is.factor, as.character) #' # -> #' iris %>% group_by(across(where(is.factor), as.character)) group_by_all <- function(.tbl, .funs = list(), ..., .add = FALSE, .drop = group_by_drop_default(.tbl)) { lifecycle::signal_stage("superseded", "group_by_all()") funs <- manip_all(.tbl, .funs, enquo(.funs), caller_env(), ..., .caller = "group_by_all") if (!length(funs)) { funs <- syms(tbl_vars(.tbl)) } .group_by_static_drop(.tbl, !!!funs, .add = .add, .drop = .drop) } #' @rdname group_by_all #' @export group_by_at <- function(.tbl, .vars, .funs = list(), ..., .add = FALSE, .drop = group_by_drop_default(.tbl)) { lifecycle::signal_stage("superseded", "group_by_at()") funs <- manip_at(.tbl, .vars, .funs, enquo(.funs), caller_env(), .include_group_vars = TRUE, ..., .caller = "group_by_at") if (!length(funs)) { funs <- tbl_at_syms(.tbl, .vars, .include_group_vars = TRUE) } .group_by_static_drop(.tbl, !!!funs, .add = .add, .drop = .drop) } #' @rdname group_by_all #' @export group_by_if <- function(.tbl, .predicate, .funs = list(), ..., .add = FALSE, .drop = group_by_drop_default(.tbl)) { lifecycle::signal_stage("superseded", "group_by_if()") funs <- manip_if(.tbl, .predicate, .funs, enquo(.funs), caller_env(), .include_group_vars = TRUE, ..., .caller = "group_by_if") if (!length(funs)) { funs <- tbl_if_syms(.tbl, .predicate, .include_group_vars = TRUE) } .group_by_static_drop(.tbl, !!!funs, .add = .add, .drop = .drop) } # workaround so that methods that do not have the .drop argument yet # don't create the auto mutate .drop column # # things like count() and group_by_all() # can call .group_by_static_drop() instead of group_by() # so that .drop is only part of the group_by() call if it is FALSE # # this is only meant to stay in dplyr until 0.8.0 to give # implementers of group_by() methods a chance to add .drop in their # arguments .group_by_static_drop <- function(..., .drop) { if(.drop) { group_by(...) } else { group_by(..., .drop = FALSE) } } dplyr/R/explain.R0000644000176200001440000000265014366556340013407 0ustar liggesusers#' Explain details of a tbl #' #' This is a generic function which gives more details about an object than #' [print()], and is more focused on human readable output than #' [str()]. #' #' @section Databases: #' Explaining a `tbl_sql` will run the SQL `EXPLAIN` command which #' will describe the query plan. This requires a little bit of knowledge about #' how `EXPLAIN` works for your database, but is very useful for #' diagnosing performance problems. #' #' @export #' @param x An object to explain #' @param ... Other parameters possibly used by generic #' @return The first argument, invisibly. #' @examplesIf requireNamespace("dbplyr", quietly = TRUE) && requireNamespace("RSQLite", quietly = TRUE) #' \donttest{ #' lahman_s <- dbplyr::lahman_sqlite() #' batting <- tbl(lahman_s, "Batting") #' batting %>% show_query() #' batting %>% explain() #' #' # The batting database has indices on all ID variables: #' # SQLite automatically picks the most restrictive index #' batting %>% filter(lgID == "NL" & yearID == 2000L) %>% explain() #' #' # OR's will use multiple indexes #' batting %>% filter(lgID == "NL" | yearID == 2000) %>% explain() #' #' # Joins will use indexes in both tables #' teams <- tbl(lahman_s, "Teams") #' batting %>% left_join(teams, c("yearID", "teamID")) %>% explain() #' } explain <- function(x, ...) { UseMethod("explain") } #' @export #' @rdname explain show_query <- function(x, ...) { UseMethod("show_query") } dplyr/R/groups-with.R0000644000176200001440000000245014366556340014235 0ustar liggesusers#' Perform an operation with temporary groups #' #' @description #' `r lifecycle::badge("superseded")` #' #' This was an experimental function that allows you to modify the grouping #' variables for a single operation; it is superseded in favour of using the #' `.by` argument to individual verbs. #' #' @param .data A data frame #' @param .groups <[`tidy-select`][dplyr_tidy_select]> One or more variables #' to group by. Unlike [group_by()], you can only group by existing variables, #' and you can use tidy-select syntax like `c(x, y, z)` to select multiple #' variables. #' #' Use `NULL` to temporarily **un**group. #' @param .f Function to apply to regrouped data. #' Supports purrr-style `~` syntax #' @param ... Additional arguments passed on to `...`. #' @keywords internal #' @export #' @examples #' df <- tibble(g = c(1, 1, 2, 2, 3), x = runif(5)) #' #' # Old #' df %>% #' with_groups(g, mutate, x_mean = mean(x)) #' # New #' df %>% mutate(x_mean = mean(x), .by = g) with_groups <- function(.data, .groups, .f, ...) { lifecycle::signal_stage("experimental", "with_groups()") loc <- tidyselect::eval_select(enquo(.groups), data = tbl_ptype(.data)) val <- syms(names(.data)[loc]) out <- group_by(.data, !!!val) .f <- as_function(.f) out <- .f(out, ...) dplyr_reconstruct(out, .data) } dplyr/R/deprec-lazyeval.R0000644000176200001440000003074514472225345015040 0ustar liggesusers#' Deprecated SE versions of main verbs. #' #' @description #' `r lifecycle::badge("deprecated")` #' #' dplyr used to offer twin versions of each verb suffixed with an #' underscore. These versions had standard evaluation (SE) semantics: #' rather than taking arguments by code, like NSE verbs, they took #' arguments by value. Their purpose was to make it possible to #' program with dplyr. However, dplyr now uses tidy evaluation #' semantics. NSE verbs still capture their arguments, but you can now #' unquote parts of these arguments. This offers full programmability #' with NSE verbs. Thus, the underscored versions are now superfluous. #' #' Unquoting triggers immediate evaluation of its operand and inlines #' the result within the captured expression. This result can be a #' value or an expression to be evaluated later with the rest of the #' argument. See `vignette("programming")` for more information. #' #' @name se-deprecated #' @param .data A data frame. #' @param dots,.dots,... Pair/values of expressions coercible to lazy objects. #' @param vars Various meanings depending on the verb. #' @param args Various meanings depending on the verb. #' @keywords internal NULL lazy_deprec <- function(fun, hint = TRUE, env = caller_env(), user_env = caller_env(2)) { lifecycle::deprecate_warn( when = "0.7.0", what = paste0(fun, "_()"), with = paste0(fun, "()"), details = if (hint) "See vignette('programming') for more help", env = env, user_env = user_env, always = TRUE ) } #' @rdname se-deprecated #' @export add_count_ <- function(x, vars, wt = NULL, sort = FALSE) { lazy_deprec("add_count") vars <- compat_lazy_dots(vars, caller_env()) wt <- wt %||% quo(NULL) wt <- compat_lazy(wt, caller_env()) add_count(x, !!!vars, wt = !!wt, sort = sort) } #' @rdname se-deprecated #' @export add_tally_ <- function(x, wt, sort = FALSE) { lazy_deprec("add_tally") wt <- compat_lazy(wt, caller_env()) add_tally(x, !!wt, sort = sort) } #' @export #' @rdname se-deprecated arrange_ <- function(.data, ..., .dots = list()) { lazy_deprec("arrange") UseMethod("arrange_") } #' @export arrange_.data.frame <- function(.data, ..., .dots = list(), .by_group = FALSE) { dots <- compat_lazy_dots(.dots, caller_env(), ...) arrange(.data, !!!dots, .by_group = .by_group) } #' @export arrange_.tbl_df <- function(.data, ..., .dots = list(), .by_group = FALSE) { dots <- compat_lazy_dots(.dots, caller_env(), ...) arrange(.data, !!!dots, .by_group = .by_group) } #' @export #' @rdname se-deprecated count_ <- function(x, vars, wt = NULL, sort = FALSE, .drop = group_by_drop_default(x)) { lazy_deprec("count") vars <- compat_lazy_dots(vars, caller_env()) wt <- wt %||% quo(NULL) wt <- compat_lazy(wt, caller_env()) count(x, !!!vars, wt = !!wt, sort = sort, .drop = .drop) } #' @export #' @rdname se-deprecated #' @inheritParams distinct distinct_ <- function(.data, ..., .dots, .keep_all = FALSE) { lazy_deprec("distinct") UseMethod("distinct_") } #' @export distinct_.data.frame <- function(.data, ..., .dots = list(), .keep_all = FALSE) { dots <- compat_lazy_dots(.dots, caller_env(), ...) distinct(.data, !!!dots, .keep_all = .keep_all) } #' @export # Can't use NextMethod() in R 3.1, r-lib/rlang#486 distinct_.tbl_df <- distinct_.data.frame #' @export distinct_.grouped_df <- function(.data, ..., .dots = list(), .keep_all = FALSE) { dots <- compat_lazy_dots(.dots, caller_env(), ...) distinct(.data, !!!dots, .keep_all = .keep_all) } #' @export #' @rdname se-deprecated do_ <- function(.data, ..., .dots = list()) { lazy_deprec("do") UseMethod("do_") } #' @export do_.NULL <- function(.data, ..., .dots = list()) { NULL } #' @export do_.data.frame <- function(.data, ..., .dots = list()) { dots <- compat_lazy_dots(.dots, caller_env(), ...) do(.data, !!!dots) } #' @export do_.grouped_df <- function(.data, ..., env = caller_env(), .dots = list()) { dots <- compat_lazy_dots(.dots, env, ...) do(.data, !!!dots) } #' @export do_.rowwise_df <- function(.data, ..., .dots = list()) { dots <- compat_lazy_dots(.dots, caller_env(), ...) do(.data, !!!dots) } #' @export #' @rdname se-deprecated filter_ <- function(.data, ..., .dots = list()) { lazy_deprec("filter") UseMethod("filter_") } #' @export filter_.data.frame <- function(.data, ..., .dots = list()) { dots <- compat_lazy_dots(.dots, caller_env(), ...) filter(.data, !!!dots) } #' @export filter_.tbl_df <- function(.data, ..., .dots = list()) { dots <- compat_lazy_dots(.dots, caller_env(), ...) filter(.data, !!!dots) } #' @export #' @rdname se-deprecated #' @inheritParams funs #' @param env The environment in which functions should be evaluated. funs_ <- function(dots, args = list(), env = base_env()) { lazy_deprec("funs") dots <- compat_lazy_dots(dots, caller_env()) funs(!!!dots, .args = args) } #' @export #' @rdname se-deprecated #' @inheritParams group_by group_by_ <- function(.data, ..., .dots = list(), add = FALSE) { lazy_deprec("group_by") UseMethod("group_by_") } #' @export group_by_.data.frame <- function(.data, ..., .dots = list(), add = FALSE, .drop = group_by_drop_default(.data)) { dots <- compat_lazy_dots(.dots, caller_env(), ...) group_by(.data, !!!dots, .add = add, .drop = .drop) } #' @export group_by_.rowwise_df <- function(.data, ..., .dots = list(), add = FALSE, .drop = group_by_drop_default(.data)) { dots <- compat_lazy_dots(.dots, caller_env(), ...) group_by(.data, !!!dots, .add = add, .drop = .drop) } #' @export #' @rdname se-deprecated group_indices_ <- function(.data, ..., .dots = list()) { lazy_deprec("group_indices", hint = FALSE) UseMethod("group_indices_") } #' @export group_indices.data.frame <- function(.data, ..., .drop = TRUE) { dots <- enquos(...) if (length(dots) == 0L) { return(rep(1L, nrow(.data))) } group_indices(group_by(.data, !!!dots, .drop = .drop)) } #' @export group_indices_.data.frame <- function(.data, ..., .dots = list()) { dots <- compat_lazy_dots(.dots, caller_env(), ...) group_indices(.data, !!!dots) } #' @export group_indices_.grouped_df <- function(.data, ..., .dots = list()) { dots <- compat_lazy_dots(.dots, caller_env(), ...) group_indices(.data, !!!dots) } #' @export group_indices_.rowwise_df <- function(.data, ..., .dots = list()) { dots <- compat_lazy_dots(.dots, caller_env(), ...) group_indices(.data, !!!dots) } #' @export #' @rdname se-deprecated mutate_ <- function(.data, ..., .dots = list()) { lazy_deprec("mutate") UseMethod("mutate_") } #' @export mutate_.data.frame <- function(.data, ..., .dots = list()) { dots <- compat_lazy_dots(.dots, caller_env(), ...) mutate(.data, !!!dots) } #' @export mutate_.tbl_df <- function(.data, ..., .dots = list()) { dots <- compat_lazy_dots(.dots, caller_env(), ..., .named = TRUE) mutate(.data, !!!dots) } #' @rdname se-deprecated #' @inheritParams tally #' @export tally_ <- function(x, wt, sort = FALSE) { lazy_deprec("tally") wt <- compat_lazy(wt, caller_env()) tally(x, wt = !!wt, sort = sort) } #' @rdname se-deprecated #' @export transmute_ <- function(.data, ..., .dots = list()) { lazy_deprec("transmute") UseMethod("transmute_") } #' @export transmute_.data.frame <- function(.data, ..., .dots = list()) { dots <- compat_lazy_dots(.dots, caller_env(), ...) transmute(.data, !!!dots) } #' @rdname se-deprecated #' @export rename_ <- function(.data, ..., .dots = list()) { lazy_deprec("rename", hint = FALSE) UseMethod("rename_") } #' @export rename_.data.frame <- function(.data, ..., .dots = list()) { dots <- compat_lazy_dots(.dots, caller_env(), ...) rename(.data, !!!dots) } #' @export rename_.grouped_df <- function(.data, ..., .dots = list()) { dots <- compat_lazy_dots(.dots, caller_env(), ...) rename(.data, !!!dots) } #' @export #' @rdname se-deprecated rename_vars_ <- function(vars, args) { lifecycle::deprecate_warn("0.7.0", "rename_vars_()", "tidyselect::vars_rename()", always = TRUE) args <- compat_lazy_dots(args, caller_env()) tidyselect::vars_rename(vars, !!!args) } #' @export #' @rdname se-deprecated select_ <- function(.data, ..., .dots = list()) { lazy_deprec("select", hint = FALSE) UseMethod("select_") } #' @export select_.data.frame <- function(.data, ..., .dots = list()) { dots <- compat_lazy_dots(.dots, caller_env(), ...) select(.data, !!!dots) } #' @export select_.grouped_df <- function(.data, ..., .dots = list()) { dots <- compat_lazy_dots(.dots, caller_env(), ...) select(.data, !!!dots) } #' @rdname se-deprecated #' @param include,exclude Character vector of column names to always #' include/exclude. #' @export select_vars_ <- function(vars, args, include = chr(), exclude = chr()) { lifecycle::deprecate_warn("0.7.0", "select_vars_()", "tidyselect::vars_select()", always = TRUE) args <- compat_lazy_dots(args, caller_env()) tidyselect::vars_select(vars, !!!args, .include = include, .exclude = exclude) } #' @export #' @rdname se-deprecated slice_ <- function(.data, ..., .dots = list()) { lazy_deprec("slice", hint = FALSE) UseMethod("slice_") } #' @export slice_.data.frame <- function(.data, ..., .dots = list()) { dots <- compat_lazy_dots(.dots, caller_env(), ...) slice(.data, !!!dots) } #' @export slice_.tbl_df <- function(.data, ..., .dots = list()) { dots <- compat_lazy_dots(.dots, caller_env(), ...) slice(.data, !!!dots) } #' @export #' @rdname se-deprecated summarise_ <- function(.data, ..., .dots = list()) { lazy_deprec("summarise", hint = FALSE) UseMethod("summarise_") } #' @export summarise_.data.frame <- function(.data, ..., .dots = list()) { dots <- compat_lazy_dots(.dots, caller_env(), ...) summarise(.data, !!!dots) } #' @export summarise_.tbl_df <- function(.data, ..., .dots = list()) { dots <- compat_lazy_dots(.dots, caller_env(), ..., .named = TRUE) summarise(.data, !!!dots) } #' @rdname se-deprecated #' @export summarize_ <- summarise_ #' Summarise and mutate multiple columns. #' #' @description #' `r lifecycle::badge("deprecated")` #' #' `mutate_each()` and `summarise_each()` are deprecated in favour of #' the new [across()] function that works within `summarise()` and `mutate()`. #' #' @keywords internal #' @export summarise_each <- function(tbl, funs, ...) { summarise_each_impl(tbl, funs, enquos(...), "summarise_each") } #' @export #' @rdname summarise_each summarise_each_ <- function(tbl, funs, vars) { summarise_each_impl(tbl, funs, vars, "summarise_each_") } summarise_each_impl <- function(tbl, funs, vars, name, env = caller_env(), user_env = caller_env(2)) { what <- paste0(name, "()") lifecycle::deprecate_warn( when = "0.7.0", what = what, with = "across()", always = TRUE, env = env, user_env = user_env ) if (is_empty(vars)) { vars <- tbl_nongroup_vars(tbl) } else { vars <- compat_lazy_dots(vars, user_env) vars <- tidyselect::vars_select(tbl_nongroup_vars(tbl), !!!vars) if (length(vars) == 1 && names(vars) == as_string(vars)) { vars <- unname(vars) } } if (is_character(funs)) { funs <- funs_(funs) } funs <- manip_at(tbl, vars, funs, enquo(funs), user_env, .caller = name) summarise(tbl, !!!funs) } #' @export #' @rdname summarise_each mutate_each <- function(tbl, funs, ...) { if (is_character(funs)) { funs <- funs_(funs) } mutate_each_impl(tbl, funs, enquos(...), "mutate_each") } #' @export #' @rdname summarise_each mutate_each_ <- function(tbl, funs, vars) { mutate_each_impl(tbl, funs, vars, "mutate_each_") } mutate_each_impl <- function(tbl, funs, vars, name, env = caller_env(), user_env = caller_env(2)) { what <- paste0(name, "()") lifecycle::deprecate_warn( when = "0.7.0", what = what, with = "across()", always = TRUE, env = env, user_env = user_env ) if (is_empty(vars)) { vars <- tbl_nongroup_vars(tbl) } else { vars <- compat_lazy_dots(vars, user_env) vars <- tidyselect::vars_select(tbl_nongroup_vars(tbl), !!!vars) if (length(vars) == 1 && names(vars) == as_string(vars)) { vars <- unname(vars) } } funs <- manip_at(tbl, vars, funs, enquo(funs), user_env, .caller = name) mutate(tbl, !!!funs) } #' @rdname summarise_each #' @export summarize_each <- summarise_each #' @rdname summarise_each #' @export summarize_each_ <- summarise_each_ dplyr/R/copy-to.R0000644000176200001440000000442114406402754013331 0ustar liggesusers#' Copy a local data frame to a remote src #' #' This function uploads a local data frame into a remote data source, creating #' the table definition as needed. Wherever possible, the new object will be #' temporary, limited to the current connection to the source. #' #' @section Methods: #' This function is a **generic**, which means that packages can provide #' implementations (methods) for other classes. See the documentation of #' individual methods for extra arguments and differences in behaviour. #' #' The following methods are currently available in loaded packages: #' \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("copy_to")}. #' @param dest remote data source #' @param df local data frame #' @param name name for new remote table. #' @param overwrite If `TRUE`, will overwrite an existing table with #' name `name`. If `FALSE`, will throw an error if `name` already #' exists. #' @param ... other parameters passed to methods. #' @seealso [collect()] for the opposite action; downloading remote data into #' a local dbl. #' @return a `tbl` object in the remote source #' @export #' @examples #' \dontrun{ #' iris2 <- dbplyr::src_memdb() %>% copy_to(iris, overwrite = TRUE) #' iris2 #' } copy_to <- function(dest, df, name = deparse(substitute(df)), overwrite = FALSE, ...) { UseMethod("copy_to") } #' Copy tables to same source, if necessary #' #' @param x,y `y` will be copied to `x`, if necessary. #' @param copy If `x` and `y` are not from the same data source, #' and `copy` is `TRUE`, then `y` will be copied into the #' same src as `x`. This allows you to join tables across srcs, but #' it is a potentially expensive operation so you must opt into it. #' @param ... Other arguments passed on to methods. #' @export auto_copy <- function(x, y, copy = FALSE, ...) { if (same_src(x, y)) { return(y) } if (!copy) { bullets <- c( "`x` and `y` must share the same src.", i = cli::format_inline("`x` is {obj_type_friendly(x)}."), i = cli::format_inline("`y` is {obj_type_friendly(y)}."), i = "Set `copy = TRUE` if `y` can be copied to the same source as `x` (may be slow)." ) abort(bullets) } UseMethod("auto_copy") } #' @export auto_copy.data.frame <- function(x, y, copy = FALSE, ...) { as.data.frame(y) } dplyr/R/doc-methods.R0000644000176200001440000000513114366556340014152 0ustar liggesusers# Adapted from sloop methods_generic <- function(x) { # Return early if generic not defined in global environment. This happens # when the documentation is read before the package is attached, or when # previewing development documentation from RStudio, since it renders the # files in a separate session. if (!"package:dplyr" %in% search()) { return(data.frame()) } info <- eval(expr(attr(utils::methods(!!x), "info")), envir = globalenv()) info <- tibble::as_tibble(info, rownames = "method") generic_esc <- gsub("([.\\[])", "\\\\\\1", x) info$class <- gsub(paste0("^", generic_esc, "[.,]"), "", info$method) info$class <- gsub("-method$", "", info$class) info$source <- gsub(paste0(" for ", generic_esc), "", info$from) # Find package methods <- map2( info$generic, info$class, utils::getS3method, optional = TRUE, envir = globalenv() ) envs <- map(methods, environment) info$package <- map_chr(envs, environmentName) # Find help topic, if it exists info$topic <- help_topic(info$method, info$package) # Don't link to self info$topic[info$topic == x] <- NA # Remove spurious matches in base packages like select.list or slice.index base_packages <- c( "base", "compiler", "datasets", "graphics", "grDevices", "grid", "methods", "parallel", "splines", "stats", "stats4", "tcltk", "tools", "utils" ) info <- info[!info$package %in% base_packages, ] info[c("generic", "class", "package", "topic", "visible", "source", "isS4")] } methods_rd <- function(x) { methods <- tryCatch(methods_generic(x), error = function(e) data.frame()) if (nrow(methods) == 0) { return("no methods found") } methods <- methods[order(methods$package, methods$class), , drop = FALSE] topics <- unname(split(methods, methods$package)) by_package <- vapply(topics, function(x) { links <- topic_links(x$class, x$package, x$topic) paste0(x$package[[1]], " (", paste0(links, collapse = ", "), ")") }, character(1)) paste0(by_package, collapse = ", ") } topic_links <- function(class, package, topic) { ifelse(is.na(topic), paste0("\\code{", class, "}"), paste0("\\code{\\link[", package, ":", topic, "]{", class, "}}") ) } help_topic <- function(x, pkg) { find_one <- function(topic, pkg) { if (identical(pkg, "")) { return(NA_character_) } path <- system.file("help", "aliases.rds", package = pkg) if (!file.exists(path)) { return(NA_character_) } aliases <- readRDS(path) if (!topic %in% names(aliases)) { return(NA_character_) } aliases[[topic]] } map2_chr(x, pkg, find_one) } dplyr/R/join-cols.R0000644000176200001440000001520214416000530013616 0ustar liggesusersjoin_cols <- function(x_names, y_names, by, ..., suffix = c(".x", ".y"), keep = NULL, error_call = caller_env()) { check_dots_empty0(...) if (is_false(keep) && any(by$condition != "==")) { abort( "Can't set `keep = FALSE` when using an inequality, rolling, or overlap join.", call = error_call ) } check_duplicate_vars(x_names, "x", error_call = error_call) check_duplicate_vars(y_names, "y", error_call = error_call) check_join_vars(by$x, x_names, by$condition, "x", error_call = error_call) check_join_vars(by$y, y_names, by$condition, "y", error_call = error_call) suffix <- standardise_join_suffix(suffix, error_call = error_call) x_by <- set_names(match(by$x, x_names), by$x) y_by <- set_names(match(by$y, y_names), by$y) x_loc <- seq_along(x_names) names(x_loc) <- x_names if (is_null(keep)) { # In x_out, equi key variables need to keep the same name, and non-equi # key variables and aux variables need suffixes for duplicates that appear # in y_out. This is equivalent to `keep = TRUE` for the non-equi keys and # `keep = FALSE` for the equi keys. equi <- by$condition == "==" y_aux <- setdiff(y_names, c(by$x[equi], by$y[equi])) x_ignore <- by$x[equi] x_check <- !x_names %in% x_ignore names(x_loc)[x_check] <- add_suffixes(x_names[x_check], c(x_ignore, y_aux), suffix$x) } else if (is_false(keep)) { # In x_out, key variables need to keep the same name, and aux # variables need suffixes for duplicates that appear in y_out y_aux <- setdiff(y_names, c(by$x, by$y)) x_ignore <- by$x x_check <- !x_names %in% x_ignore names(x_loc)[x_check] <- add_suffixes(x_names[x_check], c(x_ignore, y_aux), suffix$x) } else { # In x_out, key variables and aux variables need suffixes # for duplicates that appear in y_out names(x_loc) <- add_suffixes(x_names, y_names, suffix$x) } y_loc <- seq_along(y_names) names(y_loc) <- add_suffixes(y_names, x_names, suffix$y) if (is_null(keep)) { equi <- by$condition == "==" y_ignore <- by$y[equi] y_loc <- y_loc[!y_names %in% y_ignore] } else if (is_false(keep)) { y_ignore <- by$y y_loc <- y_loc[!y_names %in% y_ignore] } # key = named locations to use for matching # out = named locations to use in output list( x = list(key = x_by, out = x_loc), y = list(key = y_by, out = y_loc) ) } check_join_vars <- function(vars, names, condition, input, ..., error_call = caller_env()) { check_dots_empty0(...) if (!is.character(vars)) { message <- glue("Join columns in `{input}` must be character vectors.") abort(message, call = error_call) } na <- is.na(vars) if (any(na)) { bullets <- c( glue("Join columns in `{input}` can't be `NA`."), x = glue("Problem at position {err_vars(na)}.") ) abort(bullets, call = error_call) } # Columns are allowed to appear in more than one non-equi condition # (but not in a mix of non-equi and equi conditions). # When non-equi conditions are present, `keep` can't be `FALSE` so we don't # have to worry about merging into the same key column multiple times (#6499). non_equi <- condition != "==" vars <- c(vars[!non_equi], unique(vars[non_equi])) dup <- duplicated(vars) if (any(dup)) { vars <- unique(vars[dup]) bullets <- c( glue("Join columns in `{input}` must be unique."), x = glue("Problem with {err_vars(vars)}.") ) abort(bullets, call = error_call) } missing <- setdiff(vars, names) if (length(missing) > 0) { bullets <- c( glue("Join columns in `{input}` must be present in the data."), x = glue("Problem with {err_vars(missing)}.") ) abort(bullets, call = error_call) } } check_duplicate_vars <- function(vars, input, ..., error_call = caller_env()) { check_dots_empty0(...) dup <- duplicated(vars) if (any(dup)) { bullets <- c( glue("Input columns in `{input}` must be unique."), x = glue("Problem with {err_vars(vars[dup])}.") ) abort(bullets, call = error_call) } } standardise_join_suffix <- function(x, ..., error_call = caller_env()) { check_dots_empty0(...) if (!is.character(x) || length(x) != 2) { bullets <- glue( "`suffix` must be a character vector of length 2, not {obj_type_friendly(x)} of length {length(x)}." ) abort(bullets, call = error_call) } if (any(is.na(x))) { msg <- glue("`suffix` can't be `NA`.") abort(msg, call = error_call) } list(x = x[[1]], y = x[[2]]) } # `join_cols()` checks that `x` and `y` are individually unique, # which plays into assumptions made here add_suffixes <- function(x, y, suffix) { if (identical(suffix, "")) { return(x) } x <- c(y, x) # Never marks the "first" duplicate (i.e. never anything in `y`) dup <- duplicated(x) while (any(dup)) { x[dup] <- paste0(x[dup], suffix) dup <- duplicated(x) } loc <- seq2(length(y) + 1L, length(x)) x <- x[loc] x } join_cast_common <- function(x, y, vars, error_call = caller_env()) { # Explicit `x/y_arg = ""` to avoid auto naming in `cnd$x_arg` ptype <- try_fetch( vec_ptype2(x, y, x_arg = "", y_arg = "", call = error_call), vctrs_error_incompatible_type = function(cnd) { rethrow_error_join_incompatible_type(cnd, vars, error_call) } ) # Finalize unspecified columns (#6804) ptype <- vec_ptype_finalise(ptype) vec_cast_common(x = x, y = y, .to = ptype, .call = error_call) } rethrow_error_join_incompatible_type <- function(cnd, vars, call) { x_name <- cnd$x_arg y_name <- cnd$y_arg # Remap `y_name` to actual name from `y`. Useful for `join_by(a == b)` # where the name from `x` is used when determining the common type and will # be in the error `cnd`, but we need to tell the user about the name in `y`. loc <- match(y_name, names(vars$x$key)) y_name <- names(vars$y$key)[[loc]] x_name <- paste0("x$", x_name) y_name <- paste0("y$", y_name) x_type <- vec_ptype_full(cnd$x) y_type <- vec_ptype_full(cnd$y) stop_join( message = c( glue("Can't join `{x_name}` with `{y_name}` due to incompatible types."), i = glue("`{x_name}` is a <{x_type}>."), i = glue("`{y_name}` is a <{y_type}>.") ), class = "dplyr_error_join_incompatible_type", call = call ) } dplyr/R/by.R0000644000176200001440000001030614416000526012340 0ustar liggesusers#' Helper for consistent documentation of `.by` #' #' Use `@inheritParams args_by` to consistently document `.by`. #' #' @param .by `r lifecycle::badge("experimental")` #' #' <[`tidy-select`][dplyr_tidy_select]> Optionally, a selection of columns to #' group by for just this operation, functioning as an alternative to [group_by()]. For #' details and examples, see [?dplyr_by][dplyr_by]. #' #' @name args_by #' @keywords internal NULL #' Per-operation grouping with `.by`/`by` #' #' ```{r, echo = FALSE, results = "asis"} #' result <- rlang::with_options( #' knitr::knit_child("man/rmd/by.Rmd"), #' dplyr.summarise.inform = TRUE #' ) #' cat(result, sep = "\n") #' ``` #' #' @name dplyr_by NULL compute_by <- function(by, data, ..., by_arg = "by", data_arg = "data", error_call = caller_env()) { check_dots_empty0(...) error_call <- dplyr_error_call(error_call) by <- enquo(by) check_by(by, data, by_arg = by_arg, data_arg = data_arg, error_call = error_call) if (is_grouped_df(data)) { type <- "grouped" names <- group_vars(data) data <- group_data(data) } else if (is_rowwise_df(data)) { type <- "rowwise" names <- group_vars(data) data <- group_data(data) } else { if (quo_is_null(by)) { # Much faster than `eval_select_by()` for this common case by <- character() } else { by <- eval_select_by(by, data, error_call = error_call) } if (length(by) == 0L) { # `by = NULL` or empty selection type <- "ungrouped" names <- by data <- group_data(data) data <- dplyr_new_tibble(data, size = vec_size(data)) } else { type <- "grouped" names <- by data <- compute_by_groups(data, by, error_call = error_call) } } new_by(type = type, names = names, data = data) } compute_by_groups <- function(data, names, error_call = caller_env()) { data <- dplyr_col_select(data, names, error_call = error_call) info <- vec_group_loc(data) size <- vec_size(info) out <- dplyr_new_list(info$key) out[[".rows"]] <- new_list_of(info$loc, ptype = integer()) out <- dplyr_new_tibble(out, size = size) out } check_by <- function(by, data, ..., by_arg = "by", data_arg = "data", error_call = caller_env()) { check_dots_empty0(...) if (quo_is_null(by)) { return(invisible(NULL)) } if (is_grouped_df(data)) { message <- paste0( "Can't supply {.arg {by_arg}} when ", "{.arg {data_arg}} is a grouped data frame." ) cli::cli_abort(message, call = error_call) } if (is_rowwise_df(data)) { message <- paste0( "Can't supply {.arg {by_arg}} when ", "{.arg {data_arg}} is a rowwise data frame." ) cli::cli_abort(message, call = error_call) } invisible(NULL) } eval_select_by <- function(by, data, error_call = caller_env()) { out <- tidyselect::eval_select( expr = by, data = data, allow_rename = FALSE, error_call = error_call ) names(out) } new_by <- function(type, names, data) { structure(list(type = type, names = names, data = data), class = "dplyr_by") } check_by_typo <- function(..., by = NULL, error_call = caller_env()) { check_by_typo_impl( wrong = "by", right = ".by", by = {{ by }}, error_call = error_call ) } check_dot_by_typo <- function(..., .by = NULL, error_call = caller_env()) { check_by_typo_impl( wrong = ".by", right = "by", by = {{ .by }}, error_call = error_call ) } check_by_typo_impl <- function(wrong, right, by = NULL, error_call = caller_env()) { by <- enquo(by) if (quo_is_null(by)) { return(invisible()) } message <- c( "Can't specify an argument named {.code {wrong}} in this verb.", i = "Did you mean to use {.code {right}} instead?" ) cli::cli_abort(message, call = error_call) } dplyr/R/vec-case-when.R0000644000176200001440000001254114406402754014366 0ustar liggesusersvec_case_when <- function(conditions, values, ..., conditions_arg = "conditions", values_arg = "values", default = NULL, default_arg = "default", ptype = NULL, size = NULL, call = current_env()) { check_dots_empty0(...) obj_check_list(conditions, arg = "conditions", call = call) obj_check_list(values, arg = "values", call = call) list_check_all_vectors(values, arg = values_arg, call = call) n_conditions <- length(conditions) n_values <- length(values) if (n_conditions != n_values) { message <- glue( "The number of supplied conditions ({n_conditions}) must equal ", "the number of supplied values ({n_values})." ) abort(message, call = call) } if (n_conditions == 0L) { abort("At least one condition must be supplied.", call = call) } if (!is_string(conditions_arg)) { abort("`conditions_arg` must be a string.", call = call) } if (!is_string(values_arg)) { abort("`values_arg` must be a string.", call = call) } if (!is_string(default_arg)) { abort("`default_arg` must be a string.", call = call) } condition_args <- names2(conditions) condition_args <- names_as_error_names(condition_args, arg = conditions_arg) value_args <- names2(values) value_args <- names_as_error_names(value_args, arg = values_arg) names(conditions) <- condition_args names(values) <- value_args for (i in seq_len(n_conditions)) { condition <- conditions[[i]] condition_arg <- condition_args[[i]] check_logical(condition, arg = condition_arg, call = call) } size <- vec_size_common( !!!conditions, .size = size, .call = call ) # Allow `default` to participate in common type determination. # In terms of size/ptype behavior it is exactly like any other `values` element. # Have to collect inputs and splice them in all at once due to # https://github.com/r-lib/vctrs/issues/1570 extras <- list(default) names(extras) <- default_arg everything <- c(values, extras) ptype <- vec_ptype_common( !!!everything, .ptype = ptype, .call = call ) # Cast early to generate correct error message indices values <- vec_cast_common( !!!values, .to = ptype, .call = call ) if (is.null(default)) { default <- vec_init(ptype) } else { default <- vec_cast( x = default, to = ptype, x_arg = default_arg, call = call ) } # Check for correct sizes for (i in seq_len(n_conditions)) { condition <- conditions[[i]] condition_arg <- condition_args[[i]] vec_check_size(condition, size = size, arg = condition_arg, call = call) } value_sizes <- list_sizes(values) for (i in seq_len(n_values)) { value_size <- value_sizes[[i]] if (value_size != 1L) { value <- values[[i]] value_arg <- value_args[[i]] vec_check_size(value, size = size, arg = value_arg, call = call) } } default_size <- vec_size(default) if (default_size != 1L) { vec_check_size(default, size = size, arg = default_arg, call = call) } n_processed <- 0L locs <- vector("list", n_values) # Starts as unused. Any `TRUE` value in `condition` flips it to used. are_unused <- vec_rep(TRUE, times = size) for (i in seq_len(n_conditions)) { if (!any(are_unused)) { # Early exit if all values are matched, for performance break } condition <- conditions[[i]] # Treat `NA` in `condition` as `FALSE`. # `TRUE & NA == NA`, `FALSE & NA == FALSE`. # `which()` drops `NA`s loc <- are_unused & condition loc <- which(loc) locs[[i]] <- loc are_unused[loc] <- FALSE n_processed <- n_processed + 1L } if (n_processed == n_conditions && any(are_unused)) { # If all of the `conditions` are used, then we check if we need `default` loc_unused <- which(are_unused) n_processed <- n_processed + 1L n_values <- n_values + 1L locs[[n_values]] <- loc_unused values[[n_values]] <- default value_sizes[[n_values]] <- default_size } for (i in seq_len(n_processed)) { loc <- locs[[i]] value <- values[[i]] value_size <- value_sizes[[i]] if (value_size == 1L) { # Recycle "up" value <- vec_recycle(value, size = vec_size(loc)) } else { # Slice "down" value <- vec_slice(value, loc) } values[[i]] <- value } # Remove names used for error messages. We don't want them in the result. values <- unname(values) if (n_processed != n_values) { # Trim to only what will be used to fill the result seq_processed <- seq_len(n_processed) values <- values[seq_processed] locs <- locs[seq_processed] } list_unchop( x = values, indices = locs, ptype = ptype ) } names_as_error_names <- function(names, arg = "") { unnamed <- names == "" if (arg == "") { loc_unnamed <- which(unnamed) names[loc_unnamed] <- vec_paste0("..", loc_unnamed) } else { loc_named <- which(!unnamed) loc_unnamed <- which(unnamed) names[loc_named] <- vec_paste0(arg, "$", names[loc_named]) names[loc_unnamed] <- vec_paste0(arg, "[[", loc_unnamed, "]]") } names } vec_paste0 <- function (...) { args <- vec_recycle_common(...) exec(paste0, !!!args) } dplyr/R/nest-by.R0000644000176200001440000000705314366556340013332 0ustar liggesusers#' Nest by one or more variables #' #' @description #' `r lifecycle::badge("experimental")` #' #' `nest_by()` is closely related to [group_by()]. However, instead of storing #' the group structure in the metadata, it is made explicit in the data, #' giving each group key a single row along with a list-column of data frames #' that contain all the other data. #' #' `nest_by()` returns a [rowwise] data frame, which makes operations on the #' grouped data particularly elegant. See `vignette("rowwise")` for more #' details. #' #' @details #' Note that `df %>% nest_by(x, y)` is roughly equivalent to #' #' ``` #' df %>% #' group_by(x, y) %>% #' summarise(data = list(pick(everything()))) %>% #' rowwise() #' ``` #' #' If you want to unnest a nested data frame, you can either use #' `tidyr::unnest()` or take advantage of `reframe()`s multi-row behaviour: #' #' ``` #' nested %>% #' reframe(data) #' ``` #' #' @section Lifecycle: #' `nest_by()` is not stable because [`tidyr::nest(.by =)`][tidyr::nest()] #' provides very similar behavior. It may be deprecated in the future. #' #' @return #' A [rowwise] data frame. The output has the following properties: #' #' * The rows come from the underlying [group_keys()]. #' * The columns are the grouping keys plus one list-column of data frames. #' * Data frame attributes are **not** preserved, because `nest_by()` #' fundamentally creates a new data frame. #' @section Methods: #' This function is a **generic**, which means that packages can provide #' implementations (methods) for other classes. See the documentation of #' individual methods for extra arguments and differences in behaviour. #' #' The following methods are currently available in loaded packages: #' \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("nest_by")}. #' #' @inheritParams group_by #' @param .key Name of the list column #' @param .keep Should the grouping columns be kept in the list column. #' @return A tbl with one row per unique combination of the grouping variables. #' The first columns are the grouping variables, followed by a list column of tibbles #' with matching rows of the remaining columns. #' @keywords internal #' @export #' @examples #' # After nesting, you get one row per group #' iris %>% nest_by(Species) #' starwars %>% nest_by(species) #' #' # The output is grouped by row, which makes modelling particularly easy #' models <- mtcars %>% #' nest_by(cyl) %>% #' mutate(model = list(lm(mpg ~ wt, data = data))) #' models #' #' models %>% summarise(rsq = summary(model)$r.squared) #' @examplesIf requireNamespace("broom", quietly = TRUE) #' #' # This is particularly elegant with the broom functions #' models %>% summarise(broom::glance(model)) #' models %>% reframe(broom::tidy(model)) #' @examples #' #' # Note that you can also `reframe()` to unnest the data #' models %>% reframe(data) nest_by <- function(.data, ..., .key = "data", .keep = FALSE) { lifecycle::signal_stage("experimental", "nest_by()") UseMethod("nest_by") } #' @export nest_by.data.frame <- function(.data, ..., .key = "data", .keep = FALSE) { .data <- group_by(.data, ...) nest_by.grouped_df(.data, .key = .key, .keep = .keep) } #' @export nest_by.grouped_df <- function(.data, ..., .key = "data", .keep = FALSE) { if (!missing(...)) { bullets <- c( "Can't re-group while nesting", i = "Either `ungroup()` first or don't supply arguments to `nest_by()" ) abort(bullets) } vars <- group_vars(.data) keys <- group_keys(.data) keys <- mutate(keys, !!.key := group_split(.env$.data, .keep = .keep)) rowwise(keys, tidyselect::all_of(vars)) } dplyr/R/import-standalone-lazyeval.R0000644000176200001440000000456714406402754017237 0ustar liggesusers# Standalone file: do not edit by hand # Source: # ---------------------------------------------------------------------- # # --- # repo: r-lib/rlang # file: standalone-lazyeval.R # last-updated: 2018-09-18 # license: https://unlicense.org # --- # # This file serves as a reference for compatibility functions for lazyeval. # # nocov start warn_underscored <- function() { return(NULL) warn(paste( "The underscored versions are deprecated in favour of", "tidy evaluation idioms. Please see the documentation", "for `quo()` in rlang" )) } warn_text_se <- function() { return(NULL) warn("Text parsing is deprecated, please supply an expression or formula") } compat_lazy <- function(lazy, env = caller_env(), warn = TRUE) { if (warn) warn_underscored() if (missing(lazy)) { return(quo()) } if (is_quosure(lazy)) { return(lazy) } if (is_formula(lazy)) { return(as_quosure(lazy, env)) } out <- switch(typeof(lazy), symbol = , language = new_quosure(lazy, env), character = { if (warn) warn_text_se() parse_quo(lazy[[1]], env) }, logical = , integer = , double = { if (length(lazy) > 1) { warn("Truncating vector to length 1") lazy <- lazy[[1]] } new_quosure(lazy, env) }, list = if (inherits(lazy, "lazy")) { lazy = new_quosure(lazy$expr, lazy$env) } ) if (is_null(out)) { abort(sprintf("Can't convert a %s to a quosure", typeof(lazy))) } else { out } } compat_lazy_dots <- function(dots, env, ..., .named = FALSE) { if (missing(dots)) { dots <- list() } if (inherits(dots, c("lazy", "formula"))) { dots <- list(dots) } else { dots <- unclass(dots) } dots <- c(dots, list(...)) warn <- TRUE for (i in seq_along(dots)) { dots[[i]] <- compat_lazy(dots[[i]], env, warn) warn <- FALSE } named <- have_name(dots) if (.named && any(!named)) { nms <- vapply(dots[!named], function(x) expr_text(get_expr(x)), character(1)) names(dots)[!named] <- nms } names(dots) <- names2(dots) dots } compat_as_lazy <- function(quo) { structure(class = "lazy", list( expr = get_expr(quo), env = get_env(quo) )) } compat_as_lazy_dots <- function(...) { structure(class = "lazy_dots", lapply(quos(...), compat_as_lazy)) } # nocov end dplyr/R/slice.R0000644000176200001440000004213114525503021013025 0ustar liggesusers#' Subset rows using their positions #' #' @description #' `slice()` lets you index rows by their (integer) locations. It allows you #' to select, remove, and duplicate rows. It is accompanied by a number of #' helpers for common use cases: #' #' * `slice_head()` and `slice_tail()` select the first or last rows. #' * `slice_sample()` randomly selects rows. #' * `slice_min()` and `slice_max()` select rows with the smallest or largest #' values of a variable. #' #' If `.data` is a [grouped_df], the operation will be performed on each group, #' so that (e.g.) `slice_head(df, n = 5)` will select the first five rows in #' each group. #' #' @details #' Slice does not work with relational databases because they have no #' intrinsic notion of row order. If you want to perform the equivalent #' operation, use [filter()] and [row_number()]. #' #' @family single table verbs #' @inheritParams args_by #' @inheritParams arrange #' @inheritParams filter #' @param ... For `slice()`: <[`data-masking`][rlang::args_data_masking]> #' Integer row values. #' #' Provide either positive values to keep, or negative values to drop. #' The values provided must be either all positive or all negative. #' Indices beyond the number of rows in the input are silently ignored. #' #' For `slice_*()`, these arguments are passed on to methods. #' #' @param n,prop Provide either `n`, the number of rows, or `prop`, the #' proportion of rows to select. If neither are supplied, `n = 1` will be #' used. If `n` is greater than the number of rows in the group #' (or `prop > 1`), the result will be silently truncated to the group size. #' `prop` will be rounded towards zero to generate an integer number of #' rows. #' #' A negative value of `n` or `prop` will be subtracted from the group #' size. For example, `n = -2` with a group of 5 rows will select 5 - 2 = 3 #' rows; `prop = -0.25` with 8 rows will select 8 * (1 - 0.25) = 6 rows. #' @return #' An object of the same type as `.data`. The output has the following #' properties: #' #' * Each row may appear 0, 1, or many times in the output. #' * Columns are not modified. #' * Groups are not modified. #' * Data frame attributes are preserved. #' @section Methods: #' These function are **generic**s, which means that packages can provide #' implementations (methods) for other classes. See the documentation of #' individual methods for extra arguments and differences in behaviour. #' #' Methods available in currently loaded packages: #' #' * `slice()`: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice")}. #' * `slice_head()`: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice_head")}. #' * `slice_tail()`: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice_tail")}. #' * `slice_min()`: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice_min")}. #' * `slice_max()`: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice_max")}. #' * `slice_sample()`: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice_sample")}. #' @export #' @examples #' # Similar to head(mtcars, 1): #' mtcars %>% slice(1L) #' # Similar to tail(mtcars, 1): #' mtcars %>% slice(n()) #' mtcars %>% slice(5:n()) #' # Rows can be dropped with negative indices: #' slice(mtcars, -(1:4)) #' #' # First and last rows based on existing order #' mtcars %>% slice_head(n = 5) #' mtcars %>% slice_tail(n = 5) #' #' # Rows with minimum and maximum values of a variable #' mtcars %>% slice_min(mpg, n = 5) #' mtcars %>% slice_max(mpg, n = 5) #' #' # slice_min() and slice_max() may return more rows than requested #' # in the presence of ties. #' mtcars %>% slice_min(cyl, n = 1) #' # Use with_ties = FALSE to return exactly n matches #' mtcars %>% slice_min(cyl, n = 1, with_ties = FALSE) #' # Or use additional variables to break the tie: #' mtcars %>% slice_min(tibble(cyl, mpg), n = 1) #' #' # slice_sample() allows you to random select with or without replacement #' mtcars %>% slice_sample(n = 5) #' mtcars %>% slice_sample(n = 5, replace = TRUE) #' #' # you can optionally weight by a variable - this code weights by the #' # physical weight of the cars, so heavy cars are more likely to get #' # selected #' mtcars %>% slice_sample(weight_by = wt, n = 5) #' #' # Group wise operation ---------------------------------------- #' df <- tibble( #' group = rep(c("a", "b", "c"), c(1, 2, 4)), #' x = runif(7) #' ) #' #' # All slice helpers operate per group, silently truncating to the group #' # size, so the following code works without error #' df %>% group_by(group) %>% slice_head(n = 2) #' #' # When specifying the proportion of rows to include non-integer sizes #' # are rounded down, so group a gets 0 rows #' df %>% group_by(group) %>% slice_head(prop = 0.5) #' #' # Filter equivalents -------------------------------------------- #' # slice() expressions can often be written to use `filter()` and #' # `row_number()`, which can also be translated to SQL. For many databases, #' # you'll need to supply an explicit variable to use to compute the row number. #' filter(mtcars, row_number() == 1L) #' filter(mtcars, row_number() == n()) #' filter(mtcars, between(row_number(), 5, n())) slice <- function(.data, ..., .by = NULL, .preserve = FALSE) { check_by_typo(...) by <- enquo(.by) if (!quo_is_null(by) && !is_false(.preserve)) { abort("Can't supply both `.by` and `.preserve`.") } UseMethod("slice") } #' @export slice.data.frame <- function(.data, ..., .by = NULL, .preserve = FALSE) { check_dots_unnamed() dots <- enquos(...) by <- compute_by( by = {{ .by }}, data = .data, by_arg = the$slice_by_arg, data_arg = ".data" ) loc <- slice_rows(.data, dots, by) dplyr_row_slice(.data, loc, preserve = .preserve) } #' @export #' @rdname slice slice_head <- function(.data, ..., n, prop, by = NULL) { check_dot_by_typo(...) check_slice_unnamed_n_prop(..., n = n, prop = prop) UseMethod("slice_head") } #' @export slice_head.data.frame <- function(.data, ..., n, prop, by = NULL) { check_dots_empty0(...) size <- get_slice_size(n = n, prop = prop) idx <- function(n) { seq2(1, size(n)) } dplyr_local_error_call() dplyr_local_slice_by_arg("by") slice(.data, idx(dplyr::n()), .by = {{ by }}) } #' @export #' @rdname slice slice_tail <- function(.data, ..., n, prop, by = NULL) { check_dot_by_typo(...) check_slice_unnamed_n_prop(..., n = n, prop = prop) UseMethod("slice_tail") } #' @export slice_tail.data.frame <- function(.data, ..., n, prop, by = NULL) { check_dots_empty0(...) size <- get_slice_size(n = n, prop = prop) idx <- function(n) { seq2(n - size(n) + 1, n) } dplyr_local_error_call() dplyr_local_slice_by_arg("by") slice(.data, idx(dplyr::n()), .by = {{ by }}) } #' @export #' @rdname slice #' @param order_by <[`data-masking`][rlang::args_data_masking]> Variable or #' function of variables to order by. To order by multiple variables, wrap #' them in a data frame or tibble. #' @param with_ties Should ties be kept together? The default, `TRUE`, #' may return more rows than you request. Use `FALSE` to ignore ties, #' and return the first `n` rows. #' @param na_rm Should missing values in `order_by` be removed from the result? #' If `FALSE`, `NA` values are sorted to the end (like in [arrange()]), so #' they will only be included if there are insufficient non-missing values to #' reach `n`/`prop`. slice_min <- function(.data, order_by, ..., n, prop, by = NULL, with_ties = TRUE, na_rm = FALSE) { check_required(order_by) check_dot_by_typo(...) check_slice_unnamed_n_prop(..., n = n, prop = prop) check_bool(with_ties) check_bool(na_rm) UseMethod("slice_min") } #' @export slice_min.data.frame <- function(.data, order_by, ..., n, prop, by = NULL, with_ties = TRUE, na_rm = FALSE) { check_dots_empty0(...) size <- get_slice_size(n = n, prop = prop) dplyr_local_error_call() dplyr_local_slice_by_arg("by") slice( .data, .by = {{ by }}, local({ n <- dplyr::n() order_by <- {{ order_by }} vec_check_size(order_by, size = n) slice_rank_idx( order_by, size(n), direction = "asc", with_ties = !!with_ties, na_rm = !!na_rm ) }) ) } #' @export #' @rdname slice slice_max <- function(.data, order_by, ..., n, prop, by = NULL, with_ties = TRUE, na_rm = FALSE) { check_required(order_by) check_dot_by_typo(...) check_slice_unnamed_n_prop(..., n = n, prop = prop) check_bool(with_ties) check_bool(na_rm) UseMethod("slice_max") } #' @export slice_max.data.frame <- function(.data, order_by, ..., n, prop, by = NULL, with_ties = TRUE, na_rm = FALSE) { check_dots_empty0(...) size <- get_slice_size(n = n, prop = prop) dplyr_local_error_call() dplyr_local_slice_by_arg("by") slice( .data, .by = {{ by }}, local({ n <- dplyr::n() order_by <- {{ order_by }} vec_check_size(order_by, size = n) slice_rank_idx( order_by, size(n), direction = "desc", with_ties = !!with_ties, na_rm = !!na_rm ) }) ) } #' @export #' @rdname slice #' @param replace Should sampling be performed with (`TRUE`) or without #' (`FALSE`, the default) replacement. #' @param weight_by <[`data-masking`][rlang::args_data_masking]> Sampling #' weights. This must evaluate to a vector of non-negative numbers the same #' length as the input. Weights are automatically standardised to sum to 1. slice_sample <- function(.data, ..., n, prop, by = NULL, weight_by = NULL, replace = FALSE) { check_dot_by_typo(...) check_slice_unnamed_n_prop(..., n = n, prop = prop) check_bool(replace) UseMethod("slice_sample") } #' @export slice_sample.data.frame <- function(.data, ..., n, prop, by = NULL, weight_by = NULL, replace = FALSE) { check_dots_empty0(...) size <- get_slice_size(n = n, prop = prop, allow_outsize = replace) dplyr_local_error_call() dplyr_local_slice_by_arg("by") slice( .data, .by = {{ by }}, local({ weight_by <- {{ weight_by }} n <- dplyr::n() if (!is.null(weight_by)) { vec_check_size(weight_by, size = n) } sample_int(n, size(n), replace = !!replace, wt = weight_by) }) ) } # helpers ----------------------------------------------------------------- slice_rows <- function(data, dots, by, error_call = caller_env(), user_env = caller_env(2)) { error_call <- dplyr_error_call(error_call) mask <- DataMask$new(data, by, "slice", error_call = error_call) on.exit(mask$forget(), add = TRUE) chunks <- slice_eval(mask, dots, error_call = error_call, user_env = user_env) slice_indices <- slice_combine(chunks, dots, mask = mask, error_call = error_call) vec_c(!!!slice_indices, .ptype = integer()) } is_slice_call <- function(error_call) { is_slice <- TRUE if (is_environment(error_call) && !identical(error_call$.Generic, "slice")) { is_slice <- FALSE } is_slice } slice_eval <- function(mask, dots, error_call = caller_env(), user_env = caller_env(2)) { index <- 0L impl <- function(...) { n <- ...length2() out <- vector("list", n) for (i in seq_len(n)) { index <<- i slice_idx <- ...elt2(i) if (is.matrix(slice_idx) && ncol(slice_idx) == 1) { lifecycle::deprecate_warn( when = "1.1.0", what = I("Slicing with a 1-column matrix"), env = error_call, user_env = user_env ) slice_idx <- slice_idx[, 1] } out[[i]] <- vec_as_subscript( slice_idx, logical = "error", character = "error", arg = as_label(dots[[i]]), call = NULL # error always chained to slice() ) } index <<- 0L vec_c(!!!out, .ptype = integer()) } withCallingHandlers( mask$eval_all(quo(impl(!!!dots))), error = function(cnd) { if (inherits(cnd, "vctrs_error_subscript")) { action <- "process" } else { action <- "compute" } if (index && is_slice_call(error_call)) { local_error_context(dots, index, mask = mask) header <- cnd_bullet_header(action) } else { header <- glue("Can't {action} indices.") } bullets <- c(header, i = cnd_bullet_cur_group_label()) abort(bullets, call = error_call, parent = cnd) } ) } slice_combine <- function(chunks, dots, mask, error_call = caller_env()) { rows <- mask$get_rows() slice_indices <- new_list(length(rows)) withCallingHandlers( for (group in seq_along(rows)) { current_rows <- rows[[group]] loc <- num_as_location( i = chunks[[group]], n = length(current_rows), zero = "remove", oob = "remove", missing = "remove", arg = as_label(dots[[group]]), call = NULL # error always chained to slice() ) grp_loc <- current_rows[loc] grp_loc <- grp_loc[!is.na(grp_loc)] slice_indices[[group]] <- grp_loc }, error = function(cnd) { mask$set_current_group(group) bullets <- c( "Can't compute indices.", i = cnd_bullet_cur_group_label() ) abort(bullets, call = error_call, parent = cnd) } ) slice_indices } check_constant <- function(x, name, error_call = caller_env()) { withCallingHandlers(force(x), error = function(e) { bullets <- c( glue("`{name}` must be a constant.") ) abort(bullets, parent = e, call = error_call) }) } check_slice_unnamed_n_prop <- function(..., n, prop, error_call = caller_env()) { if (!missing(n) || !missing(prop)) { return(invisible()) } # Special case to capture e.g. `slice_head(2)` # Capture dots as quosures so that we can label dots <- enquos(...) if (length(dots) == 1L && names2(dots)[[1L]] == "") { slice_call <- frame_call(frame = error_call)[[1]] slice_call <- as_label(slice_call) bullets <- c( "`n` must be explicitly named.", i = glue("Did you mean `{slice_call}(n = {as_label(dots[[1]])})`?") ) abort(bullets, call = error_call) } invisible() } check_slice_n_prop <- function(n, prop, error_call = caller_env()) { if (missing(n) && missing(prop)) { list(type = "n", n = 1L) } else if (!missing(n) && missing(prop)) { n <- check_constant(n, "n", error_call = error_call) if (!is_integerish(n, n = 1) || is.na(n)) { abort( glue("`n` must be a round number, not {obj_type_friendly(n)}."), call = error_call ) } list(type = "n", n = n) } else if (!missing(prop) && missing(n)) { prop <- check_constant(prop, "prop", error_call = error_call) if (!is.numeric(prop) || length(prop) != 1 || is.na(prop)) { abort( glue("`prop` must be a number, not {obj_type_friendly(prop)}."), call = error_call ) } list(type = "prop", prop = prop) } else { abort("Must supply `n` or `prop`, but not both.", call = error_call) } } # Always returns an integer between 0 and the group size get_slice_size <- function(n, prop, allow_outsize = FALSE, error_call = caller_env()) { slice_input <- check_slice_n_prop(n, prop, error_call = error_call) if (slice_input$type == "n") { if (slice_input$n >= 0) { if (allow_outsize) { body <- expr(!!floor(slice_input$n)) } else { body <- expr(clamp(0, !!floor(slice_input$n), n)) } } else { body <- expr(clamp(0, ceiling(n + !!slice_input$n), n)) } } else if (slice_input$type == "prop") { if (slice_input$prop >= 0) { if (allow_outsize) { body <- expr(floor(!!slice_input$prop * n)) } else { body <- expr(clamp(0, floor(!!slice_input$prop * n), n)) } } else { body <- expr(clamp(0, ceiling(n + !!slice_input$prop * n), n)) } } new_function(pairlist2(n = ), body) } clamp <- function(min, x, max) { if (x < min) { min } else if (x > max) { max } else { x } } sample_int <- function(n, size, replace = FALSE, wt = NULL) { if (size == 0L) { integer(0) } else { sample.int(n, size, prob = wt, replace = replace) } } slice_rank_idx <- function( order_by, size, with_ties = TRUE, direction = c("asc", "desc"), na_rm = FALSE, call = caller_env() ) { direction <- arg_match0( arg = direction, values = c("asc", "desc"), error_call = call ) # puts missing values at the end na_value <- if (direction == "asc") "largest" else "smallest" ties <- if (with_ties) "min" else "sequential" ranks <- vec_rank( x = order_by, ties = ties, direction = direction, na_value = na_value ) keep <- ranks <= size if (na_rm) { keep[!vec_detect_complete(order_by)] <- FALSE } which <- which(keep) which[order(ranks[which])] } on_load({ # Default used by `slice()` the$slice_by_arg <- ".by" }) dplyr_local_slice_by_arg <- function(by_arg, frame = caller_env()) { local_bindings(slice_by_arg = by_arg, .env = the, .frame = frame) } # Backports for R 3.5.0 utils ...length2 <- function(frame = caller_env()) { dots <- env_get(frame, "...") if (is_missing(dots)) { 0L } else { length(dots) } } ...elt2 <- function(i, frame = caller_env()) { eval_bare(sym(paste0("..", i)), frame) } dplyr/R/pull.R0000644000176200001440000000422714267750476012733 0ustar liggesusers#' Extract a single column #' #' `pull()` is similar to `$`. It's mostly useful because it looks a little #' nicer in pipes, it also works with remote data frames, and it can optionally #' name the output. #' #' @inheritParams arrange #' @inheritParams tidyselect::vars_pull #' @param name An optional parameter that specifies the column to be used #' as names for a named vector. Specified in a similar manner as \code{var}. #' @param ... For use by methods. #' @return A vector the same size as `.data`. #' @section Methods: #' This function is a **generic**, which means that packages can provide #' implementations (methods) for other classes. See the documentation of #' individual methods for extra arguments and differences in behaviour. #' #' The following methods are currently available in loaded packages: #' \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("pull")}. #' @export #' @examples #' mtcars %>% pull(-1) #' mtcars %>% pull(1) #' mtcars %>% pull(cyl) #' @examplesIf requireNamespace("dbplyr", quietly = TRUE) && requireNamespace("RSQLite", quietly = TRUE) #' #' # Also works for remote sources #' df <- dbplyr::memdb_frame(x = 1:10, y = 10:1, .name = "pull-ex") #' df %>% #' mutate(z = x * y) %>% #' pull() #' @examples #' #' # Pull a named vector #' starwars %>% pull(height, name) pull <- function(.data, var = -1, name = NULL, ...) { check_dots_used() UseMethod("pull") } #' @export pull.data.frame <- function(.data, var = -1, name = NULL, ...) { var <- tidyselect::vars_pull(names(.data), !!enquo(var)) name <- enquo(name) if (quo_is_null(name)) { return(.data[[var]]) } name <- tidyselect::vars_pull(names(.data), !!name) set_names(.data[[var]], nm = .data[[name]]) } find_var <- function(expr, vars) { var_env <- set_names(as.list(seq_along(vars)), vars) var <- eval_tidy(expr, var_env) if (!is.numeric(var) || length(var) != 1) { abort("`var` must evaluate to a single number.") } var <- as.integer(var) n <- length(vars) if (is.na(var) || abs(var) > n || var == 0L) { abort("`var` must be a value between {-n} and {n} (excluding zero), not {var}.") } if (var < 0) { var <- var + n + 1 } vars[[var]] } dplyr/R/near.R0000644000176200001440000000066213663216626012674 0ustar liggesusers#' Compare two numeric vectors #' #' This is a safe way of comparing if two vectors of floating point numbers #' are (pairwise) equal. This is safer than using `==`, because it has #' a built in tolerance #' #' @param x,y Numeric vectors to compare #' @param tol Tolerance of comparison. #' @export #' @examples #' sqrt(2) ^ 2 == 2 #' near(sqrt(2) ^ 2, 2) near <- function(x, y, tol = .Machine$double.eps^0.5) { abs(x - y) < tol } dplyr/R/compute-collect.R0000644000176200001440000000423414406402754015040 0ustar liggesusers#' Force computation of a database query #' #' @description #' `compute()` stores results in a remote temporary table. #' `collect()` retrieves data into a local tibble. #' `collapse()` is slightly different: it doesn't force computation, but #' instead forces generation of the SQL query. This is sometimes needed to work #' around bugs in dplyr's SQL generation. #' #' All functions preserve grouping and ordering. #' #' @section Methods: #' These functions are **generics**, which means that packages can provide #' implementations (methods) for other classes. See the documentation of #' individual methods for extra arguments and differences in behaviour. #' #' Methods available in currently loaded packages: #' * `compute()`: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("compute")} #' * `collect()`: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("collect")} #' * `collapse()`: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("collapse")} #' #' @param x A data frame, data frame extension (e.g. a tibble), or a lazy #' data frame (e.g. from dbplyr or dtplyr). See *Methods*, below, for more #' details. #' @param ... Arguments passed on to methods #' @seealso [copy_to()], the opposite of `collect()`: it takes a local data #' frame and uploads it to the remote source. #' @export #' @examplesIf requireNamespace("dbplyr", quietly = TRUE) && requireNamespace("RSQLite", quietly = TRUE) #' mtcars2 <- dbplyr::src_memdb() %>% #' copy_to(mtcars, name = "mtcars2-cc", overwrite = TRUE) #' #' remote <- mtcars2 %>% #' filter(cyl == 8) %>% #' select(mpg:drat) #' #' # Compute query and save in remote table #' compute(remote) #' #' # Compute query bring back to this session #' collect(remote) #' #' # Creates a fresh query based on the generated SQL #' collapse(remote) compute <- function(x, ...) { UseMethod("compute") } #' @export #' @rdname compute collect <- function(x, ...) { UseMethod("collect") } #' @export #' @rdname compute collapse <- function(x, ...) { UseMethod("collapse") } #' @export collect.data.frame <- function(x, ...) { x } #' @export compute.data.frame <- function(x, ...) { x } #' @export collapse.data.frame <- function(x, ...) { x } dplyr/R/deprec-funs.R0000644000176200001440000000441014406402754014150 0ustar liggesusers#' Create a list of function calls #' #' @description #' `r lifecycle::badge("deprecated")` #' #' `funs()` is deprecated; please use `list()` instead. We deprecated this #' function because it provided a unique way of specifying anonymous functions, #' rather than adopting the conventions used by purrr and other packages #' in the tidyverse. #' #' @param ... <[`data-masking`][rlang::args_data_masking]> A list of functions #' specified by: #' #' - Their name, `"mean"` #' - The function itself, `mean` #' - A call to the function with `.` as a dummy argument, #' `mean(., na.rm = TRUE)` #' #' The following notations are **not** supported, see examples: #' #' - An anonymous function, `function(x) mean(x, na.rm = TRUE)` #' - An anonymous function in \pkg{purrr} notation, `~mean(., na.rm = TRUE)` #' #' @param .args,args A named list of additional arguments to be added to all #' function calls. As `funs()` is being deprecated, use other methods to #' supply arguments: `...` argument in [scoped verbs][summarise_at()] or make #' own functions with [purrr::partial()]. #' @export #' @keywords internal #' @examples #' funs("mean", mean(., na.rm = TRUE)) #' # -> #' list(mean = mean, mean = ~ mean(.x, na.rm = TRUE)) #' #' funs(m1 = mean, m2 = "mean", m3 = mean(., na.rm = TRUE)) #' # -> #' list(m1 = mean, m2 = "mean", m3 = ~ mean(.x, na.rm = TRUE)) funs <- function(..., .args = list()) { lifecycle::deprecate_warn("0.8.0", "funs()", always = TRUE, details = paste_line( "Please use a list of either functions or lambdas: ", "", " # Simple named list: ", " list(mean = mean, median = median)", "", " # Auto named with `tibble::lst()`: ", " tibble::lst(mean, median)", "", " # Using lambdas", " list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))" )) dots <- enquos(...) default_env <- caller_env() error_call <- current_env() funs <- map(dots, function(quo) as_fun(quo, default_env, .args, error_call = error_call)) new_funs(funs) } new_funs <- function(funs) { attr(funs, "have_name") <- any(names2(funs) != "") # Workaround until rlang:::label() is exported temp <- map(funs, function(fn) node_car(quo_get_expr(fn))) temp <- exprs_auto_name(temp) names(funs) <- names(temp) class(funs) <- "fun_list" funs } dplyr/R/progress.R0000644000176200001440000001035414366556340013613 0ustar liggesusers#' Progress bar with estimated time. #' #' @description #' `r lifecycle::badge("deprecated")` #' #' This progress bar has been deprecated since providing progress bars is not #' the responsibility of dplyr. Instead, you might try the more powerful #' [progress](https://github.com/r-lib/progress) package. #' #' This reference class represents a text progress bar displayed estimated #' time remaining. When finished, it displays the total duration. The #' automatic progress bar can be disabled by setting option #' `dplyr.show_progress` to `FALSE`. #' #' @param n Total number of items #' @param min_time Progress bar will wait until at least `min_time` #' seconds have elapsed before displaying any results. #' @return A ref class with methods `tick()`, `print()`, #' `pause()`, and `stop()`. #' @keywords internal #' @export #' @examples #' p <- progress_estimated(3) #' p$tick() #' p$tick() #' p$tick() #' #' p <- progress_estimated(3) #' for (i in 1:3) p$pause(0.1)$tick()$print() #' #' p <- progress_estimated(3) #' p$tick()$print()$ #' pause(1)$stop() #' #' # If min_time is set, progress bar not shown until that many #' # seconds have elapsed #' p <- progress_estimated(3, min_time = 3) #' for (i in 1:3) p$pause(0.1)$tick()$print() #' #' \dontrun{ #' p <- progress_estimated(10, min_time = 3) #' for (i in 1:10) p$pause(0.5)$tick()$print() #' } progress_estimated <- function(n, min_time = 0) { # Before 1.1.0 was soft deprecated; so doesn't get always = TRUE until 1.2.0 lifecycle::deprecate_warn("1.0.0", "dplyr::progress_estimated()") Progress$new(n, min_time = min_time) } Progress <- R6::R6Class("Progress", public = list( n = NULL, i = 0, init_time = NULL, stopped = FALSE, stop_time = NULL, min_time = NULL, last_update = NULL, initialize = function(n, min_time = 0, ...) { self$n <- n self$min_time <- min_time self$begin() }, begin = function() { "Initialise timer. Call this before beginning timing." self$i <- 0 self$last_update <- self$init_time <- now() self$stopped <- FALSE self }, pause = function(x) { "Sleep for x seconds. Useful for testing." Sys.sleep(x) self }, width = function() { getOption("width") - nchar("|100% ~ 99.9 h remaining") - 2 }, tick = function() { "Process one element" if (self$stopped) return(self) if (self$i == self$n) abort("No more ticks.") self$i <- self$i + 1 self }, stop = function() { if (self$stopped) return(self) self$stopped <- TRUE self$stop_time <- now() self }, print = function(...) { if (!isTRUE(getOption("dplyr.show_progress")) || # user specifies no progress !interactive() || # not an interactive session !is.null(getOption("knitr.in.progress"))) { # dplyr used within knitr document return(invisible(self)) } now_ <- now() if (now_ - self$init_time < self$min_time || now_ - self$last_update < 0.05) { return(invisible(self)) } self$last_update <- now_ if (self$stopped) { overall <- show_time(self$stop_time - self$init_time) if (self$i == self$n) { cat_line("Completed after ", overall) cat("\n") } else { cat_line("Killed after ", overall) cat("\n") } return(invisible(self)) } avg <- (now() - self$init_time) / self$i time_left <- (self$n - self$i) * avg nbars <- trunc(self$i / self$n * self$width()) cat_line( "|", str_rep("=", nbars), str_rep(" ", self$width() - nbars), "|", format(round(self$i / self$n * 100), width = 3), "% ", "~", show_time(time_left), " remaining" ) invisible(self) } ) ) cat_line <- function(...) { msg <- paste(..., sep = "", collapse = "") gap <- max(c(0, getOption("width") - nchar(msg, "width"))) cat("\r", msg, rep.int(" ", gap), sep = "") utils::flush.console() } str_rep <- function(x, i) { paste(rep.int(x, i), collapse = "") } show_time <- function(x) { if (x < 60) { paste(round(x), "s") } else if (x < 60 * 60) { paste(round(x / 60), "m") } else { paste(round(x / (60 * 60)), "h") } } now <- function() proc.time()[[3]] dplyr/R/deprec-dbi.R0000644000176200001440000000714214366556340013746 0ustar liggesusers#' Source for database backends #' #' @description #' `r lifecycle::badge("deprecated")` #' #' These functions have been deprecated; instead please use [tbl()] #' directly on an `DBIConnection`. See for #' more details. #' #' @param dbname Database name #' @param host,port Host name and port number of database #' @param user,username,password User name and password. #' #' Generally, you should avoid saving username and password in your #' scripts as it is easy to accidentally expose valuable credentials. #' Instead, retrieve them from environment variables, or use database #' specific credential scores. For example, with MySQL you can set up `my.cnf` #' as described in [RMySQL::MySQL()]. #' @param ... for the src, other arguments passed on to the underlying #' database connector, [DBI::dbConnect()]. For the tbl, included for #' compatibility with the generic, but otherwise ignored. #' @return An S3 object with class `src_dbi`, `src_sql`, `src`. #' @keywords internal #' @examplesIf requireNamespace("dbplyr", quietly = TRUE) && requireNamespace("RSQLite", quietly = TRUE) #' con <- DBI::dbConnect(RSQLite::SQLite(), ":memory:") #' copy_to(con, mtcars) #' #' # To retrieve a single table from a source, use `tbl()` #' mtcars <- con %>% tbl("mtcars") #' mtcars #' #' # You can also use pass raw SQL if you want a more sophisticated query #' con %>% tbl(sql("SELECT * FROM mtcars WHERE cyl == 8")) #' @name src_dbi NULL #' @rdname src_dbi #' @export src_mysql <- function(dbname, host = NULL, port = 0L, username = "root", password = "", ...) { check_dbplyr() check_installed("RMySQL", "to connect to MySQL/MariaDB.") lifecycle::deprecate_warn( "1.0.0", "dplyr::src_mysql()", details = "Please use `tbl()` directly with a database connection", always = TRUE ) con <- DBI::dbConnect( RMySQL::MySQL(), dbname = dbname, host = host, port = port, username = username, password = password, ... ) dbplyr::src_dbi(con, auto_disconnect = TRUE) } #' @rdname src_dbi #' @export src_postgres <- function(dbname = NULL, host = NULL, port = NULL, user = NULL, password = NULL, ...) { check_dbplyr() check_installed("RPostgreSQL", "to connect to PostgreSQL.") lifecycle::deprecate_warn( "1.0.0", "dplyr::src_postgres()", details = "Please use `tbl()` directly with a database connection", always = TRUE ) in_travis <- identical(Sys.getenv("TRAVIS"), "true") user <- user %||% if (in_travis) "postgres" else "" con <- DBI::dbConnect( RPostgreSQL::PostgreSQL(), host = host %||% "", dbname = dbname %||% "", user = user, password = password %||% "", port = port %||% "", ... ) dbplyr::src_dbi(con, auto_disconnect = TRUE) } #' @rdname src_dbi #' @export #' @param path Path to SQLite database. You can use the special path #' ":memory:" to create a temporary in memory database. #' @param create if `FALSE`, `path` must already exist. If #' `TRUE`, will create a new SQLite3 database at `path` if #' `path` does not exist and connect to the existing database if #' `path` does exist. src_sqlite <- function(path, create = FALSE) { check_dbplyr() lifecycle::deprecate_warn( "1.0.0", "dplyr::src_sqlite()", details = "Please use `tbl()` directly with a database connection", always = TRUE ) if (!create && !file.exists(path)) { msg <- glue("`path` must already exist, unless `create` = TRUE.") abort(msg) } con <- DBI::dbConnect(RSQLite::SQLite(), path) RSQLite::initExtension(con) dbplyr::src_dbi(con, auto_disconnect = TRUE) } dplyr/R/dplyr.R0000644000176200001440000000130114406402754013063 0ustar liggesusers#' @description #' To learn more about dplyr, start with the vignettes: #' `browseVignettes(package = "dplyr")` #' @useDynLib dplyr, .registration = TRUE #' @keywords internal #' @import rlang #' @rawNamespace import(vctrs, except = data_frame) #' @importFrom glue glue glue_collapse glue_data #' @importFrom tibble new_tibble is_tibble #' @importFrom stats setNames update #' @importFrom utils head tail #' @importFrom methods setOldClass #' @importFrom lifecycle deprecated #' @importFrom R6 R6Class "_PACKAGE" # We're importing vctrs without `data_frame()` because we currently # reexport the deprecated `tibble::data_frame()` function on_load(local_use_cli()) # Singletons the <- new_environment() dplyr/R/rank.R0000644000176200001440000001272114366556340012702 0ustar liggesusers#' Integer ranking functions #' #' @description #' Three ranking functions inspired by SQL2003. They differ primarily in how #' they handle ties: #' #' * `row_number()` gives every input a unique rank, so that `c(10, 20, 20, 30)` #' would get ranks `c(1, 2, 3, 4)`. It's equivalent to #' `rank(ties.method = "first")`. #' #' * `min_rank()` gives every tie the same (smallest) value so that #' `c(10, 20, 20, 30)` gets ranks `c(1, 2, 2, 4)`. It's the way that ranks #' are usually computed in sports and is equivalent to #' `rank(ties.method = "min")`. #' #' * `dense_rank()` works like `min_rank()`, but doesn't leave any gaps, #' so that `c(10, 20, 20, 30)` gets ranks `c(1, 2, 2, 3)`. #' #' @param x A vector to rank #' #' By default, the smallest values will get the smallest ranks. Use [desc()] #' to reverse the direction so the largest values get the smallest ranks. #' #' Missing values will be given rank `NA`. Use `coalesce(x, Inf)` or #' `coalesce(x, -Inf)` if you want to treat them as the largest or smallest #' values respectively. #' #' To rank by multiple columns at once, supply a data frame. #' @return An integer vector. #' @family ranking functions #' @examples #' x <- c(5, 1, 3, 2, 2, NA) #' row_number(x) #' min_rank(x) #' dense_rank(x) #' #' # Ranking functions can be used in `filter()` to select top/bottom rows #' df <- data.frame( #' grp = c(1, 1, 1, 2, 2, 2, 3, 3, 3), #' x = c(3, 2, 1, 1, 2, 2, 1, 1, 1), #' y = c(1, 3, 2, 3, 2, 2, 4, 1, 2), #' id = 1:9 #' ) #' # Always gives exactly 1 row per group #' df %>% group_by(grp) %>% filter(row_number(x) == 1) #' # May give more than 1 row if ties #' df %>% group_by(grp) %>% filter(min_rank(x) == 1) #' # Rank by multiple columns (to break ties) by selecting them with `pick()` #' df %>% group_by(grp) %>% filter(min_rank(pick(x, y)) == 1) #' # See slice_min() and slice_max() for another way to tackle the same problem #' #' # You can use row_number() without an argument to refer to the "current" #' # row number. #' df %>% group_by(grp) %>% filter(row_number() == 1) #' #' # It's easiest to see what this does with mutate(): #' df %>% group_by(grp) %>% mutate(grp_id = row_number()) #' @export row_number <- function(x) { if (missing(x)) { seq_len(n()) } else { vec_rank(x, ties = "sequential", incomplete = "na") } } #' @export #' @rdname row_number min_rank <- function(x) { vec_rank(x, ties = "min", incomplete = "na") } #' @export #' @rdname row_number dense_rank <- function(x) { vec_rank(x, ties = "dense", incomplete = "na") } #' Bucket a numeric vector into `n` groups #' #' @description #' `ntile()` is a sort of very rough rank, which breaks the input vector into #' `n` buckets. If `length(x)` is not an integer multiple of `n`, the size of #' the buckets will differ by up to one, with larger buckets coming first. #' #' Unlike other ranking functions, `ntile()` ignores ties: it will create #' evenly sized buckets even if the same value of `x` ends up in different #' buckets. #' #' @inheritParams row_number #' @param n Number of groups to bucket into #' @export #' @family ranking functions #' @examples #' x <- c(5, 1, 3, 2, 2, NA) #' ntile(x, 2) #' ntile(x, 4) #' #' # If the bucket sizes are uneven, the larger buckets come first #' ntile(1:8, 3) #' #' # Ties are ignored #' ntile(rep(1, 8), 3) ntile <- function(x = row_number(), n) { # Avoid recomputation in default case: # row_number(row_number(x)) == row_number(x) if (!missing(x)) { x <- row_number(x) } len <- vec_size(x) - sum(vec_detect_missing(x)) check_number_whole(n) n <- vec_cast(n, integer()) if (n <= 0L) { abort("`n` must be positive.") } # Definition from # https://techcommunity.microsoft.com/t5/sql-server/ranking-functions-rank-dense-rank-and-ntile/ba-p/383384 if (len == 0L) { rep(NA_integer_, vec_size(x)) } else { n_larger <- as.integer(len %% n) n_smaller <- as.integer(n - n_larger) size <- len / n larger_size <- as.integer(ceiling(size)) smaller_size <- as.integer(floor(size)) larger_threshold <- larger_size * n_larger bins <- if_else( x <= larger_threshold, (x + (larger_size - 1L)) / larger_size, (x + (- larger_threshold + smaller_size - 1L)) / smaller_size + n_larger ) as.integer(floor(bins)) } } #' Proportional ranking functions #' #' @description #' These two ranking functions implement two slightly different ways to #' compute a percentile. For each `x_i` in `x`: #' #' * `cume_dist(x)` counts the total number of values less than #' or equal to `x_i`, and divides it by the number of observations. #' #' * `percent_rank(x)` counts the total number of values less than #' `x_i`, and divides it by the number of observations minus 1. #' #' In both cases, missing values are ignored when counting the number #' of observations. #' #' @inheritParams row_number #' @returns A numeric vector containing a proportion. #' @family ranking functions #' @export #' @examples #' x <- c(5, 1, 3, 2, 2) #' #' cume_dist(x) #' percent_rank(x) #' #' # You can understand what's going on by computing it by hand #' sapply(x, function(xi) sum(x <= xi) / length(x)) #' sapply(x, function(xi) sum(x < xi) / (length(x) - 1)) #' # The real computations are a little more complex in order to #' # correctly deal with missing values percent_rank <- function(x) { (min_rank(x) - 1) / (sum(vec_detect_complete(x)) - 1) } #' @export #' @rdname percent_rank cume_dist <- function(x) { vec_rank(x, ties = "max", incomplete = "na") / sum(vec_detect_complete(x)) } dplyr/R/arrange.R0000644000176200001440000002214114406402754013355 0ustar liggesusers#' Order rows using column values #' #' @description #' `arrange()` orders the rows of a data frame by the values of selected #' columns. #' #' Unlike other dplyr verbs, `arrange()` largely ignores grouping; you #' need to explicitly mention grouping variables (or use `.by_group = TRUE`) #' in order to group by them, and functions of variables are evaluated #' once per data frame, not once per group. #' #' @details #' ## Missing values #' Unlike base sorting with `sort()`, `NA` are: #' * always sorted to the end for local data, even when wrapped with `desc()`. #' * treated differently for remote data, depending on the backend. #' #' @return #' An object of the same type as `.data`. The output has the following #' properties: #' #' * All rows appear in the output, but (usually) in a different place. #' * Columns are not modified. #' * Groups are not modified. #' * Data frame attributes are preserved. #' @section Methods: #' This function is a **generic**, which means that packages can provide #' implementations (methods) for other classes. See the documentation of #' individual methods for extra arguments and differences in behaviour. #' #' The following methods are currently available in loaded packages: #' \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("arrange")}. #' @export #' @param .data A data frame, data frame extension (e.g. a tibble), or a #' lazy data frame (e.g. from dbplyr or dtplyr). See *Methods*, below, for #' more details. #' @param ... <[`data-masking`][rlang::args_data_masking]> Variables, or #' functions of variables. Use [desc()] to sort a variable in descending #' order. #' @param .by_group If `TRUE`, will sort first by grouping variable. Applies to #' grouped data frames only. #' @param .locale The locale to sort character vectors in. #' #' - If `NULL`, the default, uses the `"C"` locale unless the #' `dplyr.legacy_locale` global option escape hatch is active. See the #' [dplyr-locale] help page for more details. #' #' - If a single string from [stringi::stri_locale_list()] is supplied, then #' this will be used as the locale to sort with. For example, `"en"` will #' sort with the American English locale. This requires the stringi package. #' #' - If `"C"` is supplied, then character vectors will always be sorted in the #' C locale. This does not require stringi and is often much faster than #' supplying a locale identifier. #' #' The C locale is not the same as English locales, such as `"en"`, #' particularly when it comes to data containing a mix of upper and lower case #' letters. This is explained in more detail on the [locale][dplyr-locale] #' help page under the `Default locale` section. #' @family single table verbs #' @examples #' arrange(mtcars, cyl, disp) #' arrange(mtcars, desc(disp)) #' #' # grouped arrange ignores groups #' by_cyl <- mtcars %>% group_by(cyl) #' by_cyl %>% arrange(desc(wt)) #' # Unless you specifically ask: #' by_cyl %>% arrange(desc(wt), .by_group = TRUE) #' #' # use embracing when wrapping in a function; #' # see ?rlang::args_data_masking for more details #' tidy_eval_arrange <- function(.data, var) { #' .data %>% #' arrange({{ var }}) #' } #' tidy_eval_arrange(mtcars, mpg) #' #' # Use `across()` or `pick()` to select columns with tidy-select #' iris %>% arrange(pick(starts_with("Sepal"))) #' iris %>% arrange(across(starts_with("Sepal"), desc)) arrange <- function(.data, ..., .by_group = FALSE) { UseMethod("arrange") } #' @rdname arrange #' @export arrange.data.frame <- function(.data, ..., .by_group = FALSE, .locale = NULL) { dots <- enquos(...) if (.by_group) { dots <- c(quos(!!!groups(.data)), dots) } loc <- arrange_rows(.data, dots = dots, locale = .locale) dplyr_row_slice(.data, loc) } # Helpers ----------------------------------------------------------------- arrange_rows <- function(data, dots, locale, error_call = caller_env()) { dplyr_local_error_call(error_call) size <- nrow(data) # `arrange()` implementation always uses bare data frames data <- new_data_frame(data, n = size) # Strip out calls to desc() replacing with direction argument is_desc_call <- function(x) { quo_is_call(x, "desc", ns = c("", "dplyr")) } directions <- map_chr(dots, function(dot) { if (is_desc_call(dot)) "desc" else "asc" }) dots <- map(dots, function(dot) { if (is_desc_call(dot)) { expr <- quo_get_expr(dot) if (!has_length(expr, 2L)) { abort("`desc()` must be called with exactly one argument.", call = error_call) } dot <- new_quosure(expr[[2]], quo_get_env(dot)) } dot }) n_dots <- length(dots) seq_dots <- seq_len(n_dots) cols <- vector("list", length = n_dots) names(cols) <- as.character(seq_dots) for (i in seq_dots) { name <- vec_paste0("..", i) dot <- dots[[i]] # Evaluate each `dot` on the original data separately, rather than # evaluating all at once. We want to avoid the "sequential evaluation" # feature of `mutate()` where the 2nd expression can access results of the # 1st (#6495). elt <- mutate(data, "{name}" := !!dot, .keep = "none") elt <- elt[[name]] if (is.null(elt)) { # In this case, `dot` evaluated to `NULL` for "column removal" so # `elt[[name]]` won't exist, but we don't want to shorten `cols`. next } cols[[i]] <- elt } if (vec_any_missing(cols)) { # Drop `NULL`s that could result from `dot` evaluating to `NULL` above not_missing <- !vec_detect_missing(cols) cols <- vec_slice(cols, not_missing) directions <- vec_slice(directions, not_missing) } data <- new_data_frame(cols, n = size) if (is.null(locale) && dplyr_legacy_locale()) { # Temporary legacy support for respecting the system locale. # Only applied when `.locale` is `NULL` and `dplyr.legacy_locale` is set. # Matches legacy `group_by()` ordering. out <- dplyr_order_legacy(data = data, direction = directions) return(out) } na_values <- if_else(directions == "desc", "smallest", "largest") chr_proxy_collate <- locale_to_chr_proxy_collate( locale = locale, error_call = error_call ) vec_order_radix( x = data, direction = directions, na_value = na_values, chr_proxy_collate = chr_proxy_collate ) } locale_to_chr_proxy_collate <- function(locale, ..., has_stringi = has_minimum_stringi(), error_call = caller_env()) { check_dots_empty0(...) if (is.null(locale) || is_string(locale, string = "C")) { return(NULL) } if (is_character(locale)) { if (!is_string(locale)) { abort("If `.locale` is a character vector, it must be a single string.", call = error_call) } if (!has_stringi) { abort("stringi >=1.5.3 is required to arrange in a different locale.", call = error_call) } if (!locale %in% stringi::stri_locale_list()) { abort("`.locale` must be one of the locales within `stringi::stri_locale_list()`.", call = error_call) } return(sort_key_generator(locale)) } abort("`.locale` must be a string or `NULL`.", call = error_call) } sort_key_generator <- function(locale) { function(x) { stringi::stri_sort_key(x, locale = locale) } } # ------------------------------------------------------------------------------ dplyr_order_legacy <- function(data, direction = "asc") { if (ncol(data) == 0L) { # Work around `order(!!!list())` returning `NULL` return(seq_len(nrow(data))) } proxies <- map2(data, direction, dplyr_proxy_order_legacy) proxies <- unname(proxies) inject(order(!!!proxies)) } dplyr_proxy_order_legacy <- function(x, direction) { # `order()` doesn't have a vectorized `decreasing` argument for most values of # `method` ("radix" is an exception). So we need to apply this by column ahead # of time. We have to apply `vec_proxy_order()` by column too, rather than on # the original data frame, because it flattens df-cols and we can lose track # of where to apply `direction`. x <- vec_proxy_order(x) if (is.data.frame(x)) { if (any(map_lgl(x, is.data.frame))) { abort( "All data frame columns should have been flattened by now.", .internal = TRUE ) } # Special handling for data frame proxies (either from df-cols or from # vector classes with df proxies, like rcrds), which `order()` can't handle. # We have to replace the df proxy with a single vector that orders the same # way, so we use a dense rank that utilizes the system locale. unique <- vec_unique(x) order <- dplyr_order_legacy(unique, direction) sorted_unique <- vec_slice(unique, order) out <- vec_match(x, sorted_unique) return(out) } if (!is_character(x) && !is_logical(x) && !is_integer(x) && !is_double(x) && !is_complex(x)) { abort("Invalid type returned by `vec_proxy_order()`.", .internal = TRUE) } if (is.object(x)) { x <- unstructure(x) } if (direction == "desc") { x <- desc(x) } x } dplyr/R/ts.R0000644000176200001440000000034014266276767012402 0ustar liggesusers#' @export filter.ts <- function(.data, ...) { bullets <- c( "Incompatible data source.", x = '`.data` is a object, not a data source.', i = "Did you want to use `stats::filter()`?" ) abort(bullets) } dplyr/R/na-if.R0000644000176200001440000000346414525503021012726 0ustar liggesusers#' Convert values to `NA` #' #' This is a translation of the SQL command `NULLIF`. It is useful if you want #' to convert an annoying value to `NA`. #' #' @param x Vector to modify #' @param y Value or vector to compare against. When `x` and `y` are equal, the #' value in `x` will be replaced with `NA`. #' #' `y` is [cast][vctrs::theory-faq-coercion] to the type of `x` before #' comparison. #' #' `y` is [recycled][vctrs::theory-faq-recycling] to the size of `x` before #' comparison. This means that `y` can be a vector with the same size as `x`, #' but most of the time this will be a single value. #' @return A modified version of `x` that replaces any values that #' are equal to `y` with `NA`. #' @seealso [coalesce()] to replace missing values with a specified #' value. #' #' [tidyr::replace_na()] to replace `NA` with a value. #' @export #' @examples #' na_if(1:5, 5:1) #' #' x <- c(1, -1, 0, 10) #' 100 / x #' 100 / na_if(x, 0) #' #' y <- c("abc", "def", "", "ghi") #' na_if(y, "") #' #' # `na_if()` allows you to replace `NaN` with `NA`, #' # even though `NaN == NaN` returns `NA` #' z <- c(1, NaN, NA, 2, NaN) #' na_if(z, NaN) #' #' # `na_if()` is particularly useful inside `mutate()`, #' # and is meant for use with vectors rather than entire data frames #' starwars %>% #' select(name, eye_color) %>% #' mutate(eye_color = na_if(eye_color, "unknown")) #' #' # `na_if()` can also be used with `mutate()` and `across()` #' # to alter multiple columns #' starwars %>% #' mutate(across(where(is.character), ~na_if(., "unknown"))) na_if <- function(x, y) { # Type and size stable on `x` y <- vec_cast(x = y, to = x, x_arg = "y", to_arg = "x") y <- vec_recycle(y, size = vec_size(x), x_arg = "y") na <- vec_init(x) where <- vec_equal(x, y, na_equal = TRUE) x <- vec_assign(x, where, na) x } dplyr/R/deprec-combine.R0000644000176200001440000000161614366556340014624 0ustar liggesusers#' Combine vectors #' #' @description #' `r lifecycle::badge("deprecated")` #' #' `combine()` is deprecated in favour of [vctrs::vec_c()]. `combine()` #' attempted to automatically guess whether you wanted [c()] or [unlist()], #' but could fail in surprising ways. We now believe it's better to be explicit. #' #' @param ... Vectors to combine. #' @keywords internal #' @export #' @examples #' f1 <- factor("a") #' f2 <- factor("b") #' #' combine(f1, f2) #' # -> #' vctrs::vec_c(f1, f1) #' #' combine(list(f1, f2)) #' # -> #' vctrs::vec_c(!!!list(f1, f2)) combine <- function(...) { lifecycle::deprecate_warn("1.0.0", "combine()", "vctrs::vec_c()", always = TRUE) args <- list2(...) if (length(args) == 1 && is.list(args[[1]])) { args <- args[[1]] } args <- keep(args, function(.x) !is.null(.x)) names(args) <- NULL if (length(args) == 0) { logical() } else { vec_c(!!!args) } } dplyr/R/colwise-arrange.R0000644000176200001440000000475614366556340015042 0ustar liggesusers#' Arrange rows by a selection of variables #' #' @description #' `r lifecycle::badge("superseded")` #' #' Scoped verbs (`_if`, `_at`, `_all`) have been superseded by the use of #' [pick()] or [across()] in an existing verb. See `vignette("colwise")` for #' details. #' #' These [scoped] variants of [arrange()] sort a data frame by a #' selection of variables. Like [arrange()], you can modify the #' variables before ordering with the `.funs` argument. #' #' @inheritParams scoped #' @inheritParams arrange #' #' @section Grouping variables: #' #' The grouping variables that are part of the selection participate #' in the sorting of the data frame. #' #' @export #' @keywords internal #' @examples #' df <- as_tibble(mtcars) #' arrange_all(df) #' # -> #' arrange(df, pick(everything())) #' #' arrange_all(df, desc) #' # -> #' arrange(df, across(everything(), desc)) arrange_all <- function(.tbl, .funs = list(), ..., .by_group = FALSE, .locale = NULL) { lifecycle::signal_stage("superseded", "arrange_all()") funs <- manip_all(.tbl, .funs, enquo(.funs), caller_env(), .include_group_vars = TRUE, ..., .caller = "arrange_all") if (!length(funs)) { funs <- syms(tbl_vars(.tbl)) } arrange(.tbl, !!!funs, .by_group = .by_group, .locale = .locale) } #' @rdname arrange_all #' @export arrange_at <- function(.tbl, .vars, .funs = list(), ..., .by_group = FALSE, .locale = NULL) { lifecycle::signal_stage("superseded", "arrange_at()") funs <- manip_at(.tbl, .vars, .funs, enquo(.funs), caller_env(), .include_group_vars = TRUE, ..., .caller = "arrange_at") if (!length(funs)) { funs <- tbl_at_syms(.tbl, .vars, .include_group_vars = TRUE) } arrange(.tbl, !!!funs, .by_group = .by_group, .locale = .locale) } #' @rdname arrange_all #' @export arrange_if <- function(.tbl, .predicate, .funs = list(), ..., .by_group = FALSE, .locale = NULL) { lifecycle::signal_stage("superseded", "arrange_if()") funs <- manip_if(.tbl, .predicate, .funs, enquo(.funs), caller_env(), .include_group_vars = TRUE, ..., .caller = "arrange_if") if (!length(funs)) { funs <- tbl_if_syms(.tbl, .predicate, .include_group_vars = TRUE) } arrange(.tbl, !!!funs, .by_group = .by_group, .locale = .locale) } dplyr/R/select.R0000644000176200001440000000600514366556340013224 0ustar liggesusers#' Keep or drop columns using their names and types #' #' @description #' #' Select (and optionally rename) variables in a data frame, using a concise #' mini-language that makes it easy to refer to variables based on their name #' (e.g. `a:f` selects all columns from `a` on the left to `f` on the #' right) or type (e.g. `where(is.numeric)` selects all numeric columns). #' #' ## Overview of selection features #' #' ```{r, child = "man/rmd/overview.Rmd"} #' ``` #' #' @inheritParams arrange #' @param ... <[`tidy-select`][dplyr_tidy_select]> One or more unquoted #' expressions separated by commas. Variable names can be used as if they #' were positions in the data frame, so expressions like `x:y` can #' be used to select a range of variables. #' @return #' An object of the same type as `.data`. The output has the following #' properties: #' #' * Rows are not affected. #' * Output columns are a subset of input columns, potentially with a different #' order. Columns will be renamed if `new_name = old_name` form is used. #' * Data frame attributes are preserved. #' * Groups are maintained; you can't select off grouping variables. #' #' @section Methods: #' This function is a **generic**, which means that packages can provide #' implementations (methods) for other classes. See the documentation of #' individual methods for extra arguments and differences in behaviour. #' #' The following methods are currently available in loaded packages: #' \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("select")}. #' #' @section Examples: #' #' ```{r, echo = FALSE, results = "asis"} #' result <- rlang::with_options( #' knitr::knit_child("man/rmd/select.Rmd"), #' tibble.print_min = 4, #' tibble.max_extra_cols = 8, #' pillar.min_title_chars = 20, #' digits = 2 #' ) #' cat(result, sep = "\n") #' ``` #' #' @family single table verbs #' @export select <- function(.data, ...) { UseMethod("select") } #' @export select.list <- function(.data, ...) { abort("`select()` doesn't handle lists.") } #' @export select.data.frame <- function(.data, ...) { error_call <- dplyr_error_call() loc <- tidyselect::eval_select( expr(c(...)), data = .data, error_call = error_call ) loc <- ensure_group_vars(loc, .data, notify = TRUE) out <- dplyr_col_select(.data, loc) out <- set_names(out, names(loc)) out } # Helpers ----------------------------------------------------------------- ensure_group_vars <- function(loc, data, notify = TRUE) { group_loc <- match(group_vars(data), names(data)) missing <- setdiff(group_loc, loc) if (length(missing) > 0) { vars <- names(data)[missing] added_group_loc <- set_names(missing, vars) # don't add grouping variables with same name as new variable (#5841) added_group_loc <- added_group_loc[! vars %in% names(loc)] if (length(added_group_loc) > 0 && notify) { inform(glue( "Adding missing grouping variables: ", paste0("`", names(added_group_loc), "`", collapse = ", ") )) } loc <- c(added_group_loc, loc) } loc } dplyr/R/funs.R0000644000176200001440000000521314366556340012720 0ustar liggesusers#' Detect where values fall in a specified range #' #' This is a shortcut for `x >= left & x <= right`, implemented for local #' vectors and translated to the appropriate SQL for remote tables. #' #' @details #' `x`, `left`, and `right` are all cast to their common type before the #' comparison is made. #' #' @param x A vector #' @param left,right Boundary values. Both `left` and `right` are recycled to #' the size of `x`. #' #' @returns #' A logical vector the same size as `x`. #' #' @seealso #' [join_by()] if you are looking for documentation for the `between()` overlap #' join helper. #' #' @export #' @examples #' between(1:12, 7, 9) #' #' x <- rnorm(1e2) #' x[between(x, -1, 1)] #' #' # On a tibble using `filter()` #' filter(starwars, between(height, 100, 150)) between <- function(x, left, right) { args <- list(x = x, left = left, right = right) # Common type of all inputs args <- vec_cast_common(!!!args) x <- args$x args$x <- NULL # But recycle to size of `x` args <- vec_recycle_common(!!!args, .size = vec_size(x)) left <- args$left right <- args$right left <- vec_compare(x, left) left <- left >= 0L right <- vec_compare(x, right) right <- right <= 0L left & right } #' Cumulativate versions of any, all, and mean #' #' dplyr provides `cumall()`, `cumany()`, and `cummean()` to complete R's set #' of cumulative functions. #' #' @section Cumulative logical functions: #' #' These are particularly useful in conjunction with `filter()`: #' #' * `cumall(x)`: all cases until the first `FALSE`. #' * `cumall(!x)`: all cases until the first `TRUE`. #' * `cumany(x)`: all cases after the first `TRUE`. #' * `cumany(!x)`: all cases after the first `FALSE`. #' #' @param x For `cumall()` and `cumany()`, a logical vector; for #' `cummean()` an integer or numeric vector. #' @return A vector the same length as `x`. #' @examples #' # `cummean()` returns a numeric/integer vector of the same length #' # as the input vector. #' x <- c(1, 3, 5, 2, 2) #' cummean(x) #' cumsum(x) / seq_along(x) #' #' # `cumall()` and `cumany()` return logicals #' cumall(x < 5) #' cumany(x == 3) #' #' # `cumall()` vs. `cumany()` #' df <- data.frame( #' date = as.Date("2020-01-01") + 0:6, #' balance = c(100, 50, 25, -25, -50, 30, 120) #' ) #' # all rows after first overdraft #' df %>% filter(cumany(balance < 0)) #' # all rows until first overdraft #' df %>% filter(cumall(!(balance < 0))) #' #' @export cumall <- function(x) { .Call(`dplyr_cumall`, as.logical(x)) } #' @rdname cumall #' @export cumany <- function(x) { .Call(`dplyr_cumany`, as.logical(x)) } #' @rdname cumall #' @export cummean <- function(x) { .Call(`dplyr_cummean`, as.numeric(x)) } dplyr/R/deprec-do.R0000644000176200001440000001577514366556340013625 0ustar liggesusers#' Do anything #' #' @description #' `r lifecycle::badge("superseded")` #' #' `do()` is superseded as of dplyr 1.0.0, because its syntax never really #' felt like it belonged with the rest of dplyr. It's replaced by a combination #' of [reframe()] (which can produce multiple rows and multiple columns), #' [nest_by()] (which creates a [rowwise] tibble of nested data), #' and [pick()] (which allows you to access the data for the "current" group). #' #' @param .data a tbl #' @param ... Expressions to apply to each group. If named, results will be #' stored in a new column. If unnamed, must return a data frame. You can #' use `.` to refer to the current group. You can not mix named and #' unnamed arguments. #' @keywords internal #' @export #' @examples #' # do() with unnamed arguments becomes reframe() or summarise() #' # . becomes pick() #' by_cyl <- mtcars %>% group_by(cyl) #' by_cyl %>% do(head(., 2)) #' # -> #' by_cyl %>% reframe(head(pick(everything()), 2)) #' by_cyl %>% slice_head(n = 2) #' #' # Can refer to variables directly #' by_cyl %>% do(mean = mean(.$vs)) #' # -> #' by_cyl %>% summarise(mean = mean(vs)) #' #' # do() with named arguments becomes nest_by() + mutate() & list() #' models <- by_cyl %>% do(mod = lm(mpg ~ disp, data = .)) #' # -> #' models <- mtcars %>% #' nest_by(cyl) %>% #' mutate(mod = list(lm(mpg ~ disp, data = data))) #' models %>% summarise(rsq = summary(mod)$r.squared) #' #' # use broom to turn models into data #' models %>% do(data.frame( #' var = names(coef(.$mod)), #' coef(summary(.$mod))) #' ) #' @examplesIf requireNamespace("broom", quietly = TRUE) #' # -> #' models %>% reframe(broom::tidy(mod)) do <- function(.data, ...) { lifecycle::signal_stage("superseded", "do()") UseMethod("do") } #' @export do.NULL <- function(.data, ...) { NULL } #' @export do.grouped_df <- function(.data, ...) { index <- group_rows(.data) labels <- select(group_data(.data), -last_col()) attr(labels, ".drop") <- NULL # Create ungroup version of data frame suitable for subsetting group_data <- ungroup(.data) args <- enquos(...) named <- named_args(args) mask <- new_data_mask(new_environment()) n <- length(index) m <- length(args) # Special case for zero-group/zero-row input if (n == 0) { if (named) { out <- rep_len(list(list()), length(args)) out <- set_names(out, names(args)) out <- label_output_list(labels, out, groups(.data)) } else { env_bind_do_pronouns(mask, group_data) out <- eval_tidy(args[[1]], mask) out <- out[0, , drop = FALSE] out <- label_output_dataframe(labels, list(list(out)), group_vars(.data), group_by_drop_default(.data)) } return(out) } # Add pronouns with active bindings that resolve to the current # subset. `_i` is found in environment of this function because of # usual scoping rules. group_slice <- function(value) { if (missing(value)) { group_data[index[[`_i`]], , drop = FALSE] } else { group_data[index[[`_i`]], ] <<- value } } env_bind_do_pronouns(mask, group_slice) out <- replicate(m, vector("list", n), simplify = FALSE) names(out) <- names(args) p <- Progress$new(n * m, min_time = 2) for (`_i` in seq_len(n)) { for (j in seq_len(m)) { out[[j]][`_i`] <- list(eval_tidy(args[[j]], mask)) p$tick()$print() } } if (!named) { label_output_dataframe(labels, out, group_vars(.data), group_by_drop_default(.data)) } else { label_output_list(labels, out, group_vars(.data)) } } #' @export do.data.frame <- function(.data, ...) { args <- enquos(...) named <- named_args(args) # Create custom data mask with `.` pronoun mask <- new_data_mask(new_environment()) env_bind_do_pronouns(mask, .data) if (!named) { out <- eval_tidy(args[[1]], mask) if (!inherits(out, "data.frame")) { msg <- glue("Result must be a data frame, not {fmt_classes(out)}.") abort(msg) } } else { out <- map(args, function(arg) list(eval_tidy(arg, mask))) names(out) <- names(args) out <- tibble::as_tibble(out, .name_repair = "minimal") } out } # Helper functions ------------------------------------------------------------- env_bind_do_pronouns <- function(env, data) { if (is_function(data)) { bind <- env_bind_active } else { bind <- env_bind } # Use `:=` for `.` to avoid partial matching with `.env` bind(env, "." := data, .data = data) } label_output_dataframe <- function(labels, out, groups, .drop, error_call = caller_env()) { data_frame <- vapply(out[[1]], is.data.frame, logical(1)) if (any(!data_frame)) { msg <- glue( "Results {bad} must be data frames, not {first_bad_class}.", bad = fmt_comma(which(!data_frame)), first_bad_class = fmt_classes(out[[1]][[which.min(data_frame)]]) ) abort(msg, call = error_call) } rows <- vapply(out[[1]], nrow, numeric(1)) out <- bind_rows(out[[1]]) if (!is.null(labels)) { # Remove any common columns from labels labels <- labels[setdiff(names(labels), names(out))] # Repeat each row to match data labels <- labels[rep(seq_len(nrow(labels)), rows), , drop = FALSE] rownames(labels) <- NULL grouped_df(bind_cols(labels, out), groups, .drop) } else { rowwise(out) } } label_output_list <- function(labels, out, groups) { if (!is.null(labels)) { labels[names(out)] <- out rowwise(labels) } else { class(out) <- "data.frame" attr(out, "row.names") <- .set_row_names(length(out[[1]])) rowwise(out) } } named_args <- function(args, error_call = caller_env()) { # Arguments must either be all named or all unnamed. named <- sum(names2(args) != "") if (!(named == 0 || named == length(args))) { msg <- "Arguments must either be all named or all unnamed." abort(msg, call = error_call) } if (named == 0 && length(args) > 1) { msg <- glue("Can only supply one unnamed argument, not {length(args)}.") abort(msg, call = error_call) } named != 0 } #' @export do.rowwise_df <- function(.data, ...) { # Create ungroup version of data frame suitable for subsetting group_data <- ungroup(.data) args <- enquos(...) named <- named_args(args) # Create new environment, inheriting from parent, with an active binding # for . that resolves to the current subset. `_i` is found in environment # of this function because of usual scoping rules. mask <- new_data_mask(new_environment()) current_row <- function() lapply(group_data[`_i`, , drop = FALSE], "[[", 1) env_bind_do_pronouns(mask, current_row) n <- nrow(.data) m <- length(args) out <- replicate(m, vector("list", n), simplify = FALSE) names(out) <- names(args) p <- rlang::with_options( lifecycle_verbosity = "quiet", progress_estimated(n * m, min_time = 2) ) for (`_i` in seq_len(n)) { for (j in seq_len(m)) { out[[j]][`_i`] <- list(eval_tidy(args[[j]], mask)) p$tick()$print() } } if (!named) { label_output_dataframe(NULL, out, groups(.data), group_by_drop_default(.data)) } else { label_output_list(NULL, out, groups(.data)) } } dplyr/R/distinct.R0000644000176200001440000001065614406402754013567 0ustar liggesusers#' Keep distinct/unique rows #' #' Keep only unique/distinct rows from a data frame. This is similar #' to [unique.data.frame()] but considerably faster. #' #' @inheritParams arrange #' @param ... <[`data-masking`][rlang::args_data_masking]> Optional variables to #' use when determining uniqueness. If there are multiple rows for a given #' combination of inputs, only the first row will be preserved. If omitted, #' will use all variables in the data frame. #' @param .keep_all If `TRUE`, keep all variables in `.data`. #' If a combination of `...` is not distinct, this keeps the #' first row of values. #' @return #' An object of the same type as `.data`. The output has the following #' properties: #' #' * Rows are a subset of the input but appear in the same order. #' * Columns are not modified if `...` is empty or `.keep_all` is `TRUE`. #' Otherwise, `distinct()` first calls `mutate()` to create new columns. #' * Groups are not modified. #' * Data frame attributes are preserved. #' @section Methods: #' This function is a **generic**, which means that packages can provide #' implementations (methods) for other classes. See the documentation of #' individual methods for extra arguments and differences in behaviour. #' #' The following methods are currently available in loaded packages: #' \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("distinct")}. #' @export #' @examples #' df <- tibble( #' x = sample(10, 100, rep = TRUE), #' y = sample(10, 100, rep = TRUE) #' ) #' nrow(df) #' nrow(distinct(df)) #' nrow(distinct(df, x, y)) #' #' distinct(df, x) #' distinct(df, y) #' #' # You can choose to keep all other variables as well #' distinct(df, x, .keep_all = TRUE) #' distinct(df, y, .keep_all = TRUE) #' #' # You can also use distinct on computed variables #' distinct(df, diff = abs(x - y)) #' #' # Use `pick()` to select columns with tidy-select #' distinct(starwars, pick(contains("color"))) #' #' # Grouping ------------------------------------------------- #' #' df <- tibble( #' g = c(1, 1, 2, 2, 2), #' x = c(1, 1, 2, 1, 2), #' y = c(3, 2, 1, 3, 1) #' ) #' df <- df %>% group_by(g) #' #' # With grouped data frames, distinctness is computed within each group #' df %>% distinct(x) #' #' # When `...` are omitted, `distinct()` still computes distinctness using #' # all variables in the data frame #' df %>% distinct() distinct <- function(.data, ..., .keep_all = FALSE) { UseMethod("distinct") } #' Same basic philosophy as group_by_prepare(): lazy_dots comes in, list of data and #' vars (character vector) comes out. #' @rdname group_by_prepare #' @export distinct_prepare <- function(.data, vars, group_vars = character(), .keep_all = FALSE, caller_env = caller_env(2), error_call = caller_env() ) { stopifnot(is_quosures(vars), is.character(group_vars)) # If no input, keep all variables if (length(vars) == 0) { return(list( data = .data, vars = seq_along(.data), keep = seq_along(.data) )) } # If any calls, use mutate to add new columns, then distinct on those computed_columns <- add_computed_columns(.data, vars, error_call = error_call) .data <- computed_columns$data distinct_vars <- computed_columns$added_names # Once we've done the mutate, we no longer need lazy objects, and # can instead just use their names missing_vars <- setdiff(distinct_vars, names(.data)) if (length(missing_vars) > 0) { bullets <- c( "Must use existing variables.", set_names(glue("`{missing_vars}` not found in `.data`."), rep("x", length(missing_vars))) ) abort(bullets, call = error_call) } # Only keep unique vars distinct_vars <- unique(distinct_vars) # Missing grouping variables are added to the front new_vars <- c(setdiff(group_vars, distinct_vars), distinct_vars) if (.keep_all) { keep <- seq_along(.data) } else { keep <- new_vars } list(data = .data, vars = new_vars, keep = keep) } #' @export distinct.data.frame <- function(.data, ..., .keep_all = FALSE) { prep <- distinct_prepare( .data, vars = enquos(...), group_vars = group_vars(.data), .keep_all = .keep_all, caller_env = caller_env() ) out <- prep$data cols <- dplyr_col_select(out, prep$vars) loc <- vec_unique_loc(cols) out <- dplyr_col_select(out, prep$keep) dplyr_row_slice(out, loc) } dplyr/R/transmute.R0000644000176200001440000000516714366556340013777 0ustar liggesusers#' Create, modify, and delete columns #' #' @description #' `r lifecycle::badge("superseded")` #' #' `transmute()` creates a new data frame containing only the specified #' computations. It's superseded because you can perform the same job #' with `mutate(.keep = "none")`. #' #' @inheritParams mutate #' @section Methods: #' This function is a **generic**, which means that packages can provide #' implementations (methods) for other classes. See the documentation of #' individual methods for extra arguments and differences in behaviour. #' #' Methods available in currently loaded packages: #' \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("transmute")}. #' @returns An object of the same type as `.data`. The output has the following #' properties: #' #' * Columns created or modified through `...` will be returned in the order #' specified by `...`. #' * Unmodified grouping columns will be placed at the front. #' * The number of rows is not affected. #' * Columns given the value `NULL` will be removed. #' * Groups will be recomputed if a grouping variable is mutated. #' * Data frame attributes are preserved. #' @keywords internal #' @export transmute <- function(.data, ...) { # dplyr 1.1.0 lifecycle::signal_stage("superseded", "transmute()", I("mutate(.keep = 'none')")) UseMethod("transmute") } #' @export transmute.data.frame <- function(.data, ...) { dots <- check_transmute_args(...) dots <- dplyr_quosures(!!!dots) # We don't expose `.by` because `transmute()` is superseded by <- compute_by(by = NULL, data = .data) cols <- mutate_cols(.data, dots, by) out <- dplyr_col_modify(.data, cols) # Compact out `NULL` columns that got removed. # These won't exist in `out`, but we don't want them to look "new". # Note that `dplyr_col_modify()` makes it impossible to `NULL` a group column, # which we rely on below. cols <- compact_null(cols) # Retain expression columns in order of their appearance cols_expr <- names(cols) # Retain untouched group variables up front cols_group <- by$names cols_group <- setdiff(cols_group, cols_expr) cols_retain <- c(cols_group, cols_expr) dplyr_col_select(out, cols_retain) } # helpers ----------------------------------------------------------------- check_transmute_args <- function(..., .keep, .before, .after, error_call = caller_env()) { if (!missing(.keep)) { abort("The `.keep` argument is not supported.", call = error_call) } if (!missing(.before)) { abort("The `.before` argument is not supported.", call = error_call) } if (!missing(.after)) { abort("The `.after` argument is not supported.", call = error_call) } enquos(...) } dplyr/R/utils-tidy-eval.R0000644000176200001440000000274214406415372014777 0ustar liggesusers#' Other tidy eval tools #' #' @description #' These tidy eval functions are no longer for normal usage, but are still #' exported from dplyr for backward compatibility. #' See [`?rlang::args_data_masking`][rlang::args_data_masking] and #' `vignette("programming")` for the latest recommendations. #' #' * [expr()][rlang::expr] #' * [enquo()][rlang::enquo] #' * [enquos()][rlang::enquos] #' * [sym()][rlang::sym] #' * [syms()][rlang::syms] #' * [as_label()][rlang::as_label] #' * [quo()][rlang::quo] #' * [quos()][rlang::quos] #' * [quo_name()][rlang::quo_name] #' * [ensym()][rlang::ensym] #' * [ensyms()][rlang::ensyms] #' * [enexpr()][rlang::enexpr] #' * [enexprs()][rlang::enexprs] #' #' @keywords internal #' @name tidyeval-compat #' @aliases .data expr enquo enquos sym syms as_label #' @export .data expr enquo enquos sym syms as_label #' @aliases quo quos quo_name ensym ensyms enexpr enexprs #' @export quo quos quo_name ensym ensyms enexpr enexprs NULL # Retaining a redirect for the old `dplyr_data_masking` help page, because many # package authors end up linking to this through inherited documentation, and # removing the topic from here results in a check warning in their package. It # should be possible to remove this once enough packages have re-documented with # dplyr 1.1.1 installed and sent a new release to CRAN. #' Data-masking #' #' This page is now located at #' [`?rlang::args_data_masking`][rlang::args_data_masking]. #' #' @keywords internal #' @name dplyr_data_masking NULL dplyr/R/data-bands.R0000644000176200001440000000141313663216626013740 0ustar liggesusers#' Band membership #' #' These data sets describe band members of the Beatles and Rolling Stones. They #' are toy data sets that can be displayed in their entirety on a slide (e.g. to #' demonstrate a join). #' #' `band_instruments` and `band_instruments2` contain the same data but use #' different column names for the first column of the data set. #' `band_instruments` uses `name`, which matches the name of the key column of #' `band_members`; `band_instruments2` uses `artist`, which does not. #' #' @format Each is a tibble with two variables and three observations #' @examples #' band_members #' band_instruments #' band_instruments2 "band_members" #' @rdname band_members #' @format NULL "band_instruments" #' @rdname band_members #' @format NULL "band_instruments2" dplyr/R/context.R0000644000176200001440000001064314406402754013426 0ustar liggesusers#' Information about the "current" group or variable #' #' @description #' These functions return information about the "current" group or "current" #' variable, so only work inside specific contexts like [summarise()] and #' [mutate()]. #' #' * `n()` gives the current group size. #' * `cur_group()` gives the group keys, a tibble with one row and one column #' for each grouping variable. #' * `cur_group_id()` gives a unique numeric identifier for the current group. #' * `cur_group_rows()` gives the row indices for the current group. #' * `cur_column()` gives the name of the current column (in [across()] only). #' #' See [group_data()] for equivalent functions that return values for all #' groups. #' #' See [pick()] for a way to select a subset of columns using tidyselect syntax #' while inside `summarise()` or `mutate()`. #' #' @section data.table: #' If you're familiar with data.table: #' #' * `cur_group_id()` <-> `.GRP` #' * `cur_group()` <-> `.BY` #' * `cur_group_rows()` <-> `.I` #' #' See [pick()] for an equivalent to `.SD`. #' #' @name context #' @examples #' df <- tibble( #' g = sample(rep(letters[1:3], 1:3)), #' x = runif(6), #' y = runif(6) #' ) #' gf <- df %>% group_by(g) #' #' gf %>% summarise(n = n()) #' #' gf %>% mutate(id = cur_group_id()) #' gf %>% reframe(row = cur_group_rows()) #' gf %>% summarise(data = list(cur_group())) #' #' gf %>% mutate(across(everything(), ~ paste(cur_column(), round(.x, 2)))) NULL #' @rdname context #' @export n <- function() { peek_mask()$get_current_group_size() } #' @rdname context #' @export cur_group <- function() { peek_mask()$current_key() } #' @rdname context #' @export cur_group_id <- function() { peek_mask()$get_current_group_id() } #' @rdname context #' @export cur_group_rows <- function() { peek_mask()$current_rows() } group_labels_details <- function(keys) { keys <- map_chr(keys, pillar::format_glimpse) labels <- vec_paste0(names(keys), " = ", keys) labels <- cli_collapse(labels, last = ", ") cli::format_inline("{.code {labels}}") } cur_group_label <- function(type = mask_type(), id = cur_group_id(), group = cur_group()) { switch( type, ungrouped = "", grouped = glue("group {id}: {label}", label = group_labels_details(group)), rowwise = glue("row {id}"), stop_mask_type(type) ) } cur_group_data <- function(mask_type) { switch( mask_type, ungrouped = list(), grouped = list(id = cur_group_id(), group = cur_group()), rowwise = list(id = cur_group_id()), stop_mask_type(mask_type) ) } stop_mask_type <- function(type) { cli::cli_abort( "Unexpected mask type {.val {type}}.", .internal = TRUE ) } cnd_data <- function(cnd, ctxt, mask, call) { mask_type <- mask_type(mask) has_group_data <- has_active_group_context(mask) if (has_group_data) { group_data <- cur_group_data(mask_type) } else { group_data <- NULL } list( cnd = cnd, name = ctxt$error_name, quo = ctxt$error_quo, type = mask_type, has_group_data = has_group_data, group_data = group_data, call = call ) } #' @rdname context #' @export cur_column <- function() { peek_column() } # context accessors ------------------------------------------------------- context_env <- new_environment() context_poke <- function(name, value) { old <- context_env[[name]] context_env[[name]] <- value old } context_peek_bare <- function(name) { context_env[[name]] } context_peek <- function(name, location, call = caller_env()) { context_peek_bare(name) %||% abort(glue("Must only be used inside {location}."), call = call) } context_local <- function(name, value, frame = caller_env()) { old <- context_poke(name, value) # FIXME: Pass `after = TRUE` once we depend on R 3.5. Currently this # doesn't restore in the correct order (FIFO) when context-local # functions are called multiple times within the same frame. expr <- expr(on.exit(context_poke(!!name, !!old), add = TRUE)) eval_bare(expr, frame) value } peek_column <- function(call = caller_env()) { context_peek("column", "`across()`", call) } local_column <- function(x, frame = caller_env()) { context_local("column", x, frame = frame) } peek_mask <- function(call = caller_env()) { context_peek("mask", "data-masking verbs like `mutate()`, `filter()`, and `group_by()`", call) } local_mask <- function(x, frame = caller_env()) { context_local("mask", x, frame = frame) } dplyr/R/deprec-src-local.R0000644000176200001440000000415614366556340015071 0ustar liggesusers#' A local source #' #' `r lifecycle::badge("deprecated")` #' This function was deprecated since it existed to support a style of testing #' dplyr backends that turned out not to be useful. #' #' @param tbl name of the function used to generate `tbl` objects #' @param pkg,env Either the name of a package or an environment object in #' which to look for objects. #' @keywords internal #' @export src_local <- function(tbl, pkg = NULL, env = NULL) { lifecycle::deprecate_warn("1.0.0", "src_local()", always = TRUE) if (!xor(is.null(pkg), is.null(env))) { msg <- glue("Exactly one of `pkg` and `env` must be non-NULL, not {(!is.null(pkg)) + (!is.null(env))}.") abort(msg) } if (!is.null(pkg)) { env <- getNamespaceInfo(pkg, "lazydata") name <- paste0("") } else { stopifnot(is.environment(env)) name <- utils::capture.output(print(env)) } structure( list(tbl_f = match.fun(tbl), name = name, env = env), class = c("src_local", "src") ) } #' @rdname src_local #' @export src_df <- function(pkg = NULL, env = NULL) { src_local("as_tibble", pkg, env) } #' @export src_tbls.src_local <- function(x, ...) { objs <- ls(envir = x$env, all.names = TRUE) Filter(function(obj) is.data.frame(get(obj, envir = x$env)), objs) } #' @export tbl.src_local <- function(src, from, ...) { src$tbl_f(get(from, src$env)) } #' @export copy_to.src_local <- function(dest, df, name = deparse(substitute(df)), overwrite = FALSE, ...) { if (!overwrite && exists(name, envir = dest$env, inherits = FALSE)) { msg <- glue("Object with `name` = {fmt_obj(name)} must not already exist, unless `overwrite` = TRUE.") abort(msg) } assign(name, envir = dest$env, df) tbl(dest, name) } #' @export format.src_local <- function(x, ...) { paste0( "src: ", x$name, "\n", wrap("tbls: ", paste0(sort(src_tbls(x)), collapse = ", ")) ) } wrap <- function(..., indent = 0) { x <- paste0(..., collapse = "") wrapped <- strwrap( x, indent = indent, exdent = indent + 2, width = getOption("width") ) paste0(wrapped, collapse = "\n") } dplyr/R/group-nest.R0000644000176200001440000000500214366556340014044 0ustar liggesusers group_nest_impl <- function(.tbl, .key, keep = FALSE){ mutate(group_keys(.tbl), !!.key := group_split(.tbl, .keep = keep)) } #' Nest a tibble using a grouping specification #' #' @description #' `r lifecycle::badge("experimental")` #' #' Nest a tibble using a grouping specification #' #' @section Lifecycle: #' `group_nest()` is not stable because [`tidyr::nest(.by =)`][tidyr::nest()] #' provides very similar behavior. It may be deprecated in the future. #' #' @section Grouped data frames: #' #' The primary use case for [group_nest()] is with already grouped data frames, #' typically a result of [group_by()]. In this case [group_nest()] only uses #' the first argument, the grouped tibble, and warns when `...` is used. #' #' @section Ungrouped data frames: #' #' When used on ungrouped data frames, [group_nest()] forwards the `...` to #' [group_by()] before nesting, therefore the `...` are subject to the data mask. #' #' @param .tbl A tbl #' @param ... Grouping specification, forwarded to [group_by()] #' @param .key the name of the list column #' @param keep Should the grouping columns be kept in the list column. #' @return A tbl with one row per unique combination of the grouping variables. #' The first columns are the grouping variables, followed by a list column of tibbles #' with matching rows of the remaining columns. #' @keywords internal #' @family grouping functions #' @export #' @examples #' #' #----- use case 1: a grouped data frame #' iris %>% #' group_by(Species) %>% #' group_nest() #' #' # this can be useful if the grouped data has been altered before nesting #' iris %>% #' group_by(Species) %>% #' filter(Sepal.Length > mean(Sepal.Length)) %>% #' group_nest() #' #' #----- use case 2: using group_nest() on a ungrouped data frame with #' # a grouping specification that uses the data mask #' starwars %>% #' group_nest(species, homeworld) group_nest <- function(.tbl, ..., .key = "data", keep = FALSE){ lifecycle::signal_stage("experimental", "group_nest()") UseMethod("group_nest") } #' @export group_nest.data.frame <- function(.tbl, ..., .key = "data", keep = FALSE) { if (dots_n(...)) { group_nest_impl(group_by(.tbl, ...), .key = .key, keep = keep) } else { tibble(!!.key := list(.tbl)) } } #' @export group_nest.grouped_df <- function(.tbl, ..., .key = "data", keep = FALSE) { if (dots_n(...)) { warn("... is ignored in group_nest(), please use group_by(..., .add = TRUE) %>% group_nest()") } group_nest_impl(.tbl, .key = .key, keep = keep) } dplyr/R/src.R0000644000176200001440000000265214366556340012540 0ustar liggesusers#' Create a "src" object #' #' `src()` is the standard constructor for srcs and `is.src()` tests. #' #' @keywords internal #' @export #' @param subclass name of subclass. "src" is an abstract base class, so you #' must supply this value. `src_` is automatically prepended to the #' class name #' @param ... fields used by object. #' #' These dots are evaluated with [explicit splicing][rlang::dots_list]. #' @param x object to test for "src"-ness. src <- function(subclass, ...) { subclass <- paste0("src_", subclass) structure(dots_list(...), class = c(subclass, "src")) } #' @rdname src #' @export is.src <- function(x) inherits(x, "src") #' @export print.src <- function(x, ...) { cat(format(x, ...), "\n", sep = "") } #' List all tbls provided by a source. #' #' This is a generic method which individual src's will provide methods for. #' Most methods will not be documented because it's usually pretty obvious what #' possible results will be. #' #' @param x a data src. #' @param ... other arguments passed on to the individual methods. #' @export #' @keywords internal src_tbls <- function(x, ...) { UseMethod("src_tbls") } #' Figure out if two sources are the same (or two tbl have the same source) #' #' @param x,y src or tbls to test #' @return a logical flag #' @export #' @keywords internal same_src <- function(x, y) { UseMethod("same_src") } #' @export same_src.data.frame <- function(x, y) { is.data.frame(y) } dplyr/R/src-dbi.R0000644000176200001440000000070014366556340013264 0ustar liggesusers#' @export tbl.DBIConnection <- function(src, from, ...) { check_dbplyr() tbl(dbplyr::src_dbi(src, auto_disconnect = FALSE), from = from, ...) } #' @export copy_to.DBIConnection <- function(dest, df, name = deparse(substitute(df)), overwrite = FALSE, ...) { check_dbplyr() copy_to( dbplyr::src_dbi(dest, auto_disconnect = FALSE), df = df, name = name, overwrite = overwrite, ... ) } dplyr/R/colwise-distinct.R0000644000176200001440000000511114366556340015226 0ustar liggesusers#' Select distinct rows by a selection of variables #' #' @description #' `r lifecycle::badge("superseded")` #' #' Scoped verbs (`_if`, `_at`, `_all`) have been superseded by the use of #' [pick()] or [across()] in an existing verb. See `vignette("colwise")` for #' details. #' #' These [scoped] variants of [distinct()] extract distinct rows by a #' selection of variables. Like `distinct()`, you can modify the #' variables before ordering with the `.funs` argument. #' #' @param .keep_all If `TRUE`, keep all variables in `.data`. #' If a combination of `...` is not distinct, this keeps the #' first row of values. #' @inheritParams scoped #' @export #' #' @section Grouping variables: #' #' The grouping variables that are part of the selection are taken #' into account to determine distinct rows. #' #' @keywords internal #' @examples #' df <- tibble(x = rep(2:5, each = 2) / 2, y = rep(2:3, each = 4) / 2) #' #' distinct_all(df) #' # -> #' distinct(df, pick(everything())) #' #' distinct_at(df, vars(x,y)) #' # -> #' distinct(df, pick(x, y)) #' #' distinct_if(df, is.numeric) #' # -> #' distinct(df, pick(where(is.numeric))) #' #' # You can supply a function that will be applied before extracting the distinct values #' # The variables of the sorted tibble keep their original values. #' distinct_all(df, round) #' # -> #' distinct(df, across(everything(), round)) distinct_all <- function(.tbl, .funs = list(), ..., .keep_all = FALSE) { lifecycle::signal_stage("superseded", "distinct_all()") funs <- manip_all(.tbl, .funs, enquo(.funs), caller_env(), .include_group_vars = TRUE, ..., .caller = "distinct_all") if (!length(funs)) { funs <- syms(tbl_vars(.tbl)) } distinct(.tbl, !!!funs, .keep_all = .keep_all) } #' @rdname distinct_all #' @export distinct_at <- function(.tbl, .vars, .funs = list(), ..., .keep_all = FALSE) { lifecycle::signal_stage("superseded", "distinct_at()") funs <- manip_at(.tbl, .vars, .funs, enquo(.funs), caller_env(), .include_group_vars = TRUE, ..., .caller = "distinct_at") if (!length(funs)) { funs <- tbl_at_syms(.tbl, .vars, .include_group_vars = TRUE) } distinct(.tbl, !!!funs, .keep_all = .keep_all) } #' @rdname distinct_all #' @export distinct_if <- function(.tbl, .predicate, .funs = list(), ..., .keep_all = FALSE) { lifecycle::signal_stage("superseded", "distinct_if()") funs <- manip_if(.tbl, .predicate, .funs, enquo(.funs), caller_env(), .include_group_vars = TRUE, ..., .caller = "distinct_if") if (!length(funs)) { funs <- tbl_if_syms(.tbl, .predicate, .include_group_vars = TRUE) } distinct(.tbl, !!!funs, .keep_all = .keep_all) } dplyr/R/data-mask.R0000644000176200001440000002255214406402754013606 0ustar liggesusersDataMask <- R6Class("DataMask", public = list( initialize = function(data, by, verb, error_call) { rows <- by$data$.rows if (length(rows) == 0) { # Specially handle case of zero groups rows <- new_list_of(list(integer()), ptype = integer()) } private$rows <- rows frame <- caller_env(n = 2) local_mask(self, frame) names_bindings <- chr_unserialise_unicode(names2(data)) if (any(names_bindings == "")) { # `names2()` converted potential `NA` names to `""` already abort("Can't transform a data frame with `NA` or `\"\"` names.", call = error_call) } if (anyDuplicated(names_bindings)) { abort("Can't transform a data frame with duplicate names.", call = error_call) } names(data) <- names_bindings private$size <- nrow(data) private$current_data <- dplyr_new_list(data) private$grouped <- by$type == "grouped" private$rowwise <- by$type == "rowwise" # `duplicate(0L)` is necessary to ensure that the value we modify by # reference is "fresh" and completely owned by this instance of the # `DataMask`. Otherwise nested `mutate()` calls can end up modifying # the same value (#6762). private$env_current_group_info <- new_environment(data = list( `dplyr:::current_group_id` = duplicate(0L), `dplyr:::current_group_size` = duplicate(0L) )) private$chops <- .Call( dplyr_lazy_vec_chop_impl, data, rows, private$env_current_group_info, private$grouped, private$rowwise ) private$env_mask_bindings <- .Call( dplyr_make_mask_bindings, private$chops, data ) private$keys <- group_keys0(by$data) private$by_names <- by$names private$verb <- verb }, add_one = function(name, chunks, result) { if (self$is_rowwise()){ is_scalar_list <- function(.x) { obj_is_list(.x) && length(.x) == 1L } if (all(map_lgl(chunks, is_scalar_list))) { chunks <- map(chunks, `[[`, 1L) } } .Call(`dplyr_mask_binding_add`, private, name, result, chunks) }, remove = function(name) { .Call(`dplyr_mask_binding_remove`, private, name) }, resolve = function(name) { private$chops[[name]] }, eval_all = function(quo) { .Call(`dplyr_mask_eval_all`, quo, private) }, eval_all_summarise = function(quo) { # Wrap in a function called `eval()` so that rlang ignores the # call in error messages. This only concerns errors that occur # directly in `quo`. eval <- function() .Call(`dplyr_mask_eval_all_summarise`, quo, private) eval() }, eval_all_mutate = function(quo) { eval <- function() .Call(`dplyr_mask_eval_all_mutate`, quo, private) eval() }, eval_all_filter = function(quos, env_filter) { eval <- function() .Call(`dplyr_mask_eval_all_filter`, quos, private, private$size, env_filter) eval() }, pick_current = function(vars) { # Only used for deprecated `cur_data()`, `cur_data_all()`, and # `across(.fns = NULL)`. We should remove this when we defunct those. cols <- self$current_cols(vars) if (self$is_rowwise()) { cols <- map2(cols, names(cols), function(col, name) { if (obj_is_list(private$current_data[[name]])) { col <- list(col) } col }) } dplyr_new_tibble(cols, size = self$get_current_group_size_mutable()) }, current_cols = function(vars) { env_get_list(private$env_mask_bindings, vars) }, current_rows = function() { private$rows[[self$get_current_group_id_mutable()]] }, current_key = function() { keys <- private$keys if (vec_size(keys) == 0L) { # Specially handle case of zero groups, like in `$initialize()`. # We always evaluate at least 1 group, so the slice call would attempt # to do `vec_slice(<0-row-df>, 1L)`, which is an error. keys } else { vec_slice(keys, self$get_current_group_id_mutable()) } }, current_vars = function() { names(private$current_data) }, current_non_group_vars = function() { setdiff(self$current_vars(), private$by_names) }, # This pair of functions provides access to `dplyr:::current_group_id`. # - `dplyr:::current_group_id` is modified by reference at the C level. # - If you access it ephemerally, the mutable version can be used. # - If you access it persistently, like in `cur_group_id()`, it must be # duplicated on the way out. # - For maximal performance, we inline the mutable function definition into # the non-mutable version. get_current_group_id = function() { duplicate(private[["env_current_group_info"]][["dplyr:::current_group_id"]]) }, get_current_group_id_mutable = function() { private[["env_current_group_info"]][["dplyr:::current_group_id"]] }, # This pair of functions provides access to `dplyr:::current_group_size`. # - `dplyr:::current_group_size` is modified by reference at the C level. # - If you access it ephemerally, the mutable version can be used. # - If you access it persistently, like in `n()`, it must be duplicated on # the way out. # - For maximal performance, we inline the mutable function definition into # the non-mutable version. get_current_group_size = function() { duplicate(private[["env_current_group_info"]][["dplyr:::current_group_size"]]) }, get_current_group_size_mutable = function() { private[["env_current_group_info"]][["dplyr:::current_group_size"]] }, set_current_group = function(group) { # Only to be used right before throwing an error. # We `duplicate()` both values to be extremely conservative, because there # is an extremely small chance we could modify this by reference and cause # issues with the `group` variable in the caller, but this has never been # seen. We generally assume `length()` always returns a fresh variable, so # we probably don't need to duplicate there, but it seems better to be # extremely safe here. env_current_group_info <- private[["env_current_group_info"]] env_current_group_info[["dplyr:::current_group_id"]] <- duplicate(group) env_current_group_info[["dplyr:::current_group_size"]] <- duplicate(length(private$rows[[group]])) }, get_used = function() { .Call(env_resolved, private$chops, names(private$current_data)) }, unused_vars = function() { used <- self$get_used() current_vars <- self$current_vars() current_vars[!used] }, get_rows = function() { private$rows }, get_current_data = function(groups = TRUE) { out <- private$current_data if (!groups) { out <- out[self$current_non_group_vars()] } out }, forget = function() { names_bindings <- self$current_vars() verb <- private$verb osbolete_promise_fn <- function(name) { abort(c( "Obsolete data mask.", x = glue("Too late to resolve `{name}` after the end of `dplyr::{verb}()`."), i = glue("Did you save an object that uses `{name}` lazily in a column in the `dplyr::{verb}()` expression ?") ), call = NULL) } promises <- map(names_bindings, function(.x) expr(osbolete_promise_fn(!!.x))) env_mask_bindings <- private$env_mask_bindings suppressWarnings({ rm(list = names_bindings, envir = env_mask_bindings) env_bind_lazy(env_mask_bindings, !!!set_names(promises, names_bindings)) }) }, is_grouped = function() { private$grouped }, is_rowwise = function() { private$rowwise }, get_keys = function() { private$keys }, get_size = function() { private$size }, get_rlang_mask = function() { # Mimicking the data mask that is created during typical # expression evaluations, like in `DataMask$eval_all_mutate()`. # Important to insert a `.data` pronoun! mask <- new_data_mask(private$env_mask_bindings) mask[[".data"]] <- as_data_pronoun(private$env_mask_bindings) mask } ), private = list( # environment that contains lazy vec_chop()s for each input column # and list of result chunks as they get added. chops = NULL, # Environment which contains the: # - Current group id # - Current group size # Both of which are updated by reference at the C level. # This environment is the parent environment of `chops`. env_current_group_info = NULL, # Environment with active bindings for each column. # Expressions are evaluated in a fresh data mask created from this # environment. Each group gets its own newly created data mask to avoid # cross group contamination of the data mask by lexical side effects, like # usage of `<-` (#6666). env_mask_bindings = NULL, # ptypes of all the variables current_data = list(), # names of the `by` variables by_names = character(), # list of indices, one integer vector per group rows = NULL, # data frame of keys, one row per group keys = NULL, # number of rows in `data` size = NULL, # Type of data frame grouped = NULL, rowwise = NULL, verb = character() ) ) dplyr/R/rowwise.R0000644000176200001440000001176214366556340013452 0ustar liggesusers#' Group input by rows #' #' @description #' `rowwise()` allows you to compute on a data frame a row-at-a-time. #' This is most useful when a vectorised function doesn't exist. #' #' Most dplyr verbs preserve row-wise grouping. The exception is [summarise()], #' which return a [grouped_df]. You can explicitly ungroup with [ungroup()] #' or [as_tibble()], or convert to a [grouped_df] with [group_by()]. #' #' @section List-columns: #' Because a rowwise has exactly one row per group it offers a small #' convenience for working with list-columns. Normally, `summarise()` and #' `mutate()` extract a groups worth of data with `[`. But when you index #' a list in this way, you get back another list. When you're working with #' a `rowwise` tibble, then dplyr will use `[[` instead of `[` to make your #' life a little easier. #' #' @param data Input data frame. #' @param ... <[`tidy-select`][dplyr_tidy_select]> Variables to be preserved #' when calling [summarise()]. This is typically a set of variables whose #' combination uniquely identify each row. #' #' **NB**: unlike `group_by()` you can not create new variables here but #' instead you can select multiple variables with (e.g.) `everything()`. #' @seealso [nest_by()] for a convenient way of creating rowwise data frames #' with nested data. #' @return A row-wise data frame with class `rowwise_df`. Note that a #' `rowwise_df` is implicitly grouped by row, but is not a `grouped_df`. #' @export #' @examples #' df <- tibble(x = runif(6), y = runif(6), z = runif(6)) #' # Compute the mean of x, y, z in each row #' df %>% rowwise() %>% mutate(m = mean(c(x, y, z))) #' # use c_across() to more easily select many variables #' df %>% rowwise() %>% mutate(m = mean(c_across(x:z))) #' #' # Compute the minimum of x and y in each row #' df %>% rowwise() %>% mutate(m = min(c(x, y, z))) #' # In this case you can use an existing vectorised function: #' df %>% mutate(m = pmin(x, y, z)) #' # Where these functions exist they'll be much faster than rowwise #' # so be on the lookout for them. #' #' # rowwise() is also useful when doing simulations #' params <- tribble( #' ~sim, ~n, ~mean, ~sd, #' 1, 1, 1, 1, #' 2, 2, 2, 4, #' 3, 3, -1, 2 #' ) #' # Here I supply variables to preserve after the computation #' params %>% #' rowwise(sim) %>% #' reframe(z = rnorm(n, mean, sd)) #' #' # If you want one row per simulation, put the results in a list() #' params %>% #' rowwise(sim) %>% #' summarise(z = list(rnorm(n, mean, sd)), .groups = "keep") rowwise <- function(data, ...) { UseMethod("rowwise") } #' @export rowwise.data.frame <- function(data, ...) { vars <- tidyselect::eval_select(expr(c(...)), data) rowwise_df(data, vars) } #' @export rowwise.grouped_df <- function(data, ...) { if (!missing(...)) { bullets <- c( "Can't re-group when creating rowwise data.", i = "Either first `ungroup()` or call `rowwise()` without arguments." ) abort(bullets) } rowwise_df(data, group_vars(data)) } # Constructor + helper ---------------------------------------------------- rowwise_df <- function(data, group_vars) { group_data <- as_tibble(data)[group_vars] new_rowwise_df(data, group_data) } is_rowwise_df <- function(x) { inherits(x, "rowwise_df") } #' @rdname new_grouped_df #' @export new_rowwise_df <- function(data, group_data = NULL, ..., class = character()) { nrow <- nrow(data) if (!is.null(group_data)) { if (!is_tibble(group_data) || has_name(group_data, ".rows")) { msg <- "`group_data` must be a tibble without a `.rows` column." abort(msg) } group_data <- new_tibble(vec_data(group_data), nrow = nrow) # strip attributes } else { group_data <- new_tibble(list(), nrow = nrow) } group_data$.rows <- new_list_of(as.list(seq_len(nrow)), ptype = integer()) new_tibble( data, groups = group_data, ..., nrow = nrow, class = c(class, "rowwise_df") ) } #' @rdname new_grouped_df #' @export validate_rowwise_df <- function(x) { result <- .Call(`dplyr_validate_rowwise_df`, x) if (!is.null(result)) { abort(result) } x } setOldClass(c("rowwise_df", "tbl_df", "tbl", "data.frame")) # methods ----------------------------------------------------------------- #' @importFrom pillar tbl_sum #' @export tbl_sum.rowwise_df <- function(x, ...) { c( NextMethod(), "Rowwise" = commas(group_vars(x)) ) } #' @export as_tibble.rowwise_df <- function(x, ...) { new_tibble(vec_data(x), nrow = nrow(x)) } #' @export `[.rowwise_df` <- function(x, i, j, drop = FALSE) { out <- NextMethod() if (!is.data.frame(out)) { return(out) } group_vars <- intersect(names(out), group_vars(x)) rowwise_df(out, group_vars) } #' @export `[<-.rowwise_df` <- function(x, i, j, ..., value) { out <- NextMethod() group_vars <- intersect(names(out), group_vars(x)) rowwise_df(out, group_vars) } #' @export `names<-.rowwise_df` <- function(x, value) { data <- NextMethod() group_vars <- value[match(group_vars(x), names(x))] rowwise_df(data, group_vars) } dplyr/R/all-equal.R0000644000176200001440000000670014406402754013616 0ustar liggesusers#' Flexible equality comparison for data frames #' #' @description #' `r lifecycle::badge("deprecated")` #' #' `all_equal()` allows you to compare data frames, optionally ignoring #' row and column names. It is deprecated as of dplyr 1.1.0, because it #' makes it too easy to ignore important differences. #' #' @param target,current Two data frames to compare. #' @param ignore_col_order Should order of columns be ignored? #' @param ignore_row_order Should order of rows be ignored? #' @param convert Should similar classes be converted? Currently this will #' convert factor to character and integer to double. #' @param ... Ignored. Needed for compatibility with `all.equal()`. #' @return `TRUE` if equal, otherwise a character vector describing #' the reasons why they're not equal. Use [isTRUE()] if using the #' result in an `if` expression. #' @export #' @keywords internal #' @examples #' scramble <- function(x) x[sample(nrow(x)), sample(ncol(x))] #' #' # `all_equal()` ignored row and column ordering by default, #' # but we now feel that that makes it too easy to make mistakes #' mtcars2 <- scramble(mtcars) #' all_equal(mtcars, mtcars2) #' #' # Instead, be explicit about the row and column ordering #' all.equal( #' mtcars, #' mtcars2[rownames(mtcars), names(mtcars)] #' ) all_equal <- function(target, current, ignore_col_order = TRUE, ignore_row_order = TRUE, convert = FALSE, ...) { lifecycle::deprecate_warn("1.1.0", "all_equal()", "all.equal()", details = "And manually order the rows/cols as needed" ) equal_data_frame(target, current, ignore_col_order = ignore_col_order, ignore_row_order = ignore_row_order, convert = convert ) } equal_data_frame <- function(x, y, ignore_col_order = TRUE, ignore_row_order = TRUE, convert = FALSE) { compat <- is_compatible(x, y, ignore_col_order = ignore_col_order, convert = convert) if (!isTRUE(compat)) { # revert the bulleting from is_compatible() return(glue_collapse(compat, sep = "\n")) } nrows_x <- nrow(x) nrows_y <- nrow(y) if (nrows_x != nrows_y) { return("Different number of rows.") } if (ncol(x) == 0L) { return(TRUE) } # suppressMessages({ x <- as_tibble(x, .name_repair = "universal") y <- as_tibble(y, .name_repair = "universal") # }) x_split <- dplyr_locate_sorted_groups(x) y_split <- dplyr_locate_sorted_groups(y[, names(x), drop = FALSE]) # keys must be identical msg <- "" if (any(wrong <- !vec_in(x_split$key, y_split$key))) { rows <- sort(map_int(x_split$loc[which(wrong)], function(.x) .x[1L])) msg <- paste0(msg, "- Rows in x but not in y: ", glue_collapse(rows, sep = ", "), "\n") } if (any(wrong <- !vec_in(y_split$key, x_split$key))) { rows <- sort(map_int(y_split$loc[which(wrong)], function(.x) .x[1L])) msg <- paste0(msg, "- Rows in y but not in x: ", glue_collapse(rows, sep = ", "), "\n") } if (msg != "") { return(msg) } # keys are identical, check that rows occur the same number of times if (any(wrong <- lengths(x_split$loc) != lengths(y_split$loc))) { rows <- sort(map_int(x_split$loc[which(wrong)], function(.x) .x[1L])) return(paste0("- Rows with difference occurrences in x and y: ", glue_collapse(rows, sep = ", "), "\n" )) } # then if we care about row order, the id need to be identical if (!ignore_row_order && !all(vec_equal(x_split$loc, y_split$loc))) { return("Same row values, but different order") } TRUE } dplyr/R/tbl.R0000644000176200001440000000422614472225345012525 0ustar liggesusers#' Create a table from a data source #' #' This is a generic method that dispatches based on the first argument. #' #' @param src A data source #' @param ... Other arguments passed on to the individual methods #' @export tbl <- function(src, ...) { UseMethod("tbl") } #' Create a "tbl" object #' #' `tbl()` is the standard constructor for tbls. `as.tbl()` coerces, #' and `is.tbl()` tests. #' #' @keywords internal #' @export #' @param subclass name of subclass. "tbl" is an abstract base class, so you #' must supply this value. `tbl_` is automatically prepended to the #' class name #' @param ... For `tbl()`, other fields used by class. For `as.tbl()`, #' other arguments passed to methods. make_tbl <- function(subclass, ...) { subclass <- paste0("tbl_", subclass) structure(list(...), class = c(subclass, "tbl")) } #' @rdname tbl #' @param x Any object #' @export is.tbl <- function(x) inherits(x, "tbl") tbl_vars_dispatch <- function(x) { UseMethod("tbl_vars") } new_sel_vars <- function(vars, group_vars) { structure( vars, groups = group_vars, class = c("dplyr_sel_vars", "character") ) } #' List variables provided by a tbl. #' #' `tbl_vars()` returns all variables while `tbl_nongroup_vars()` #' returns only non-grouping variables. The `groups` attribute #' of the object returned by `tbl_vars()` is a character vector of the #' grouping columns. #' #' @export #' @param x A tbl object #' @seealso [group_vars()] for a function that returns grouping #' variables. #' @keywords internal tbl_vars <- function(x) { return(new_sel_vars(tbl_vars_dispatch(x), group_vars(x))) # For roxygen and static analysis UseMethod("tbl_vars") } #' @export tbl_vars.data.frame <- function(x) { names(x) } #' @rdname tbl_vars #' @export tbl_nongroup_vars <- function(x) { setdiff(tbl_vars(x), group_vars(x)) } is_sel_vars <- function(x) { inherits(x, "dplyr_sel_vars") } #' @export print.dplyr_sel_vars <- function(x, ...) { cat("\n") print(unstructure(x)) groups <- attr(x, "groups") if (length(groups)) { cat("Groups:\n") print(groups) } invisible(x) } unstructure <- function(x) { attributes(x) <- NULL x } dplyr/R/reexport-magrittr.R0000644000176200001440000000006714366556340015446 0ustar liggesusers#' @importFrom magrittr %>% #' @export magrittr::`%>%` dplyr/R/colwise-select.R0000644000176200001440000001122414266276767014701 0ustar liggesusers#' Select and rename a selection of variables #' #' @description #' `r lifecycle::badge("superseded")` #' #' `rename_if()`, `rename_at()`, and `rename_all()` have been superseded by #' `rename_with()`. The matching select statements have been superseded by the #' combination of a `select()` + `rename_with()`. Any predicate functions passed #' as arguments to `select()` or `rename_with()` must be wrapped in [where()]. #' #' These functions were superseded because `mutate_if()` and friends were #' superseded by `across()`. `select_if()` and `rename_if()` already use tidy #' selection so they can't be replaced by `across()` and instead we need a new #' function. #' #' @inheritParams scoped #' @keywords internal #' @param .funs A function `fun`, a purrr style lambda `~ fun(.)` or a list of either form. #' @examples #' mtcars <- as_tibble(mtcars) # for nicer printing #' #' mtcars %>% rename_all(toupper) #' # -> #' mtcars %>% rename_with(toupper) #' #' # NB: the transformation comes first in rename_with #' is_whole <- function(x) all(floor(x) == x) #' mtcars %>% rename_if(is_whole, toupper) #' # -> #' mtcars %>% rename_with(toupper, where(is_whole)) #' #' mtcars %>% rename_at(vars(mpg:hp), toupper) #' # -> #' mtcars %>% rename_with(toupper, mpg:hp) #' #' # You now must select() and then rename #' #' mtcars %>% select_all(toupper) #' # -> #' mtcars %>% rename_with(toupper) #' #' # Selection drops unselected variables: #' mtcars %>% select_if(is_whole, toupper) #' # -> #' mtcars %>% select(where(is_whole)) %>% rename_with(toupper) #' #' mtcars %>% select_at(vars(-contains("ar"), starts_with("c")), toupper) #' # -> #' mtcars %>% #' select(!contains("ar") | starts_with("c")) %>% #' rename_with(toupper) #' @export select_all <- function(.tbl, .funs = list(), ...) { lifecycle::signal_stage("superseded", "select_all()") funs <- as_fun_list(.funs, caller_env(), ..., .caller = "select_all") vars <- tbl_vars(.tbl) syms <- vars_select_syms(vars, funs, .tbl) select(.tbl, !!!syms) } #' @rdname select_all #' @export rename_all <- function(.tbl, .funs = list(), ...) { lifecycle::signal_stage("superseded", "rename_with()") funs <- as_fun_list(.funs, caller_env(), ..., .caller = "rename_all") vars <- tbl_vars(.tbl) syms <- vars_select_syms(vars, funs, .tbl, strict = TRUE) rename(.tbl, !!!syms) } #' @rdname select_all #' @export select_if <- function(.tbl, .predicate, .funs = list(), ...) { funs <- as_fun_list(.funs, caller_env(), ..., .caller = "select_if") if (!is_logical(.predicate)) { .predicate <- as_fun_list(.predicate, caller_env(), .caller = "select_if", .caller_arg = ".predicate") } vars <- tbl_if_vars(.tbl, .predicate, caller_env(), .include_group_vars = TRUE) syms <- vars_select_syms(vars, funs, .tbl) select(.tbl, !!!syms) } #' @rdname select_all #' @export rename_if <- function(.tbl, .predicate, .funs = list(), ...) { funs <- as_fun_list(.funs, caller_env(), ..., .caller = "rename_if") if (!is_logical(.predicate)) { .predicate <- as_fun_list(.predicate, caller_env(), .caller = "rename_if", .caller_arg = ".predicate") } vars <- tbl_if_vars(.tbl, .predicate, caller_env(), .include_group_vars = TRUE) syms <- vars_select_syms(vars, funs, .tbl, strict = TRUE) rename(.tbl, !!!syms) } #' @rdname select_all #' @export select_at <- function(.tbl, .vars, .funs = list(), ...) { vars <- tbl_at_vars(.tbl, .vars, .include_group_vars = TRUE) funs <- as_fun_list(.funs, caller_env(), ..., .caller = "select_at") syms <- vars_select_syms(vars, funs, .tbl) select(.tbl, !!!syms) } #' @rdname select_all #' @export rename_at <- function(.tbl, .vars, .funs = list(), ...) { vars <- tbl_at_vars(.tbl, .vars, .include_group_vars = TRUE) funs <- as_fun_list(.funs, caller_env(), ..., .caller = "rename_at") syms <- vars_select_syms(vars, funs, .tbl, strict = TRUE) rename(.tbl, !!!syms) } vars_select_syms <- function(vars, funs, tbl, strict = FALSE, error_call = caller_env()) { if (length(funs) > 1) { msg <- glue("`.funs` must contain one renaming function, not {length(funs)}.") abort(msg, call = error_call) } else if (length(funs) == 1) { fun <- funs[[1]] if (is_quosure(fun)) { fun <- quo_as_function(fun) } syms <- if (length(vars)) { set_names(syms(vars), fun(as.character(vars))) } else { set_names(syms(vars)) } } else if (!strict) { syms <- syms(vars) } else { msg <- glue("`.funs` must specify a renaming function.") abort(msg, call = error_call) } group_vars <- group_vars(tbl) group_syms <- syms(group_vars) has_group_sym <- group_syms %in% syms new_group_syms <- set_names(group_syms[!has_group_sym], group_vars[!has_group_sym]) c(new_group_syms, syms) } dplyr/R/count-tally.R0000644000176200001440000001527214420040360014203 0ustar liggesusers#' Count the observations in each group #' #' @description #' `count()` lets you quickly count the unique values of one or more variables: #' `df %>% count(a, b)` is roughly equivalent to #' `df %>% group_by(a, b) %>% summarise(n = n())`. #' `count()` is paired with `tally()`, a lower-level helper that is equivalent #' to `df %>% summarise(n = n())`. Supply `wt` to perform weighted counts, #' switching the summary from `n = n()` to `n = sum(wt)`. #' #' `add_count()` and `add_tally()` are equivalents to `count()` and `tally()` #' but use `mutate()` instead of `summarise()` so that they add a new column #' with group-wise counts. #' #' @param x A data frame, data frame extension (e.g. a tibble), or a #' lazy data frame (e.g. from dbplyr or dtplyr). #' @param ... <[`data-masking`][rlang::args_data_masking]> Variables to group #' by. #' @param wt <[`data-masking`][rlang::args_data_masking]> Frequency weights. #' Can be `NULL` or a variable: #' #' * If `NULL` (the default), counts the number of rows in each group. #' * If a variable, computes `sum(wt)` for each group. #' @param sort If `TRUE`, will show the largest groups at the top. #' @param name The name of the new column in the output. #' #' If omitted, it will default to `n`. If there's already a column called `n`, #' it will use `nn`. If there's a column called `n` and `nn`, it'll use #' `nnn`, and so on, adding `n`s until it gets a new name. #' @param .drop Handling of factor levels that don't appear in the data, passed #' on to [group_by()]. #' #' For `count()`: if `FALSE` will include counts for empty groups (i.e. for #' levels of factors that don't exist in the data). #' #' `r lifecycle::badge("deprecated")` For `add_count()`: deprecated since it #' can't actually affect the output. #' @return #' An object of the same type as `.data`. `count()` and `add_count()` #' group transiently, so the output has the same groups as the input. #' @export #' @examples #' # count() is a convenient way to get a sense of the distribution of #' # values in a dataset #' starwars %>% count(species) #' starwars %>% count(species, sort = TRUE) #' starwars %>% count(sex, gender, sort = TRUE) #' starwars %>% count(birth_decade = round(birth_year, -1)) #' #' # use the `wt` argument to perform a weighted count. This is useful #' # when the data has already been aggregated once #' df <- tribble( #' ~name, ~gender, ~runs, #' "Max", "male", 10, #' "Sandra", "female", 1, #' "Susan", "female", 4 #' ) #' # counts rows: #' df %>% count(gender) #' # counts runs: #' df %>% count(gender, wt = runs) #' #' # When factors are involved, `.drop = FALSE` can be used to retain factor #' # levels that don't appear in the data #' df2 <- tibble( #' id = 1:5, #' type = factor(c("a", "c", "a", NA, "a"), levels = c("a", "b", "c")) #' ) #' df2 %>% count(type) #' df2 %>% count(type, .drop = FALSE) #' #' # Or, using `group_by()`: #' df2 %>% group_by(type, .drop = FALSE) %>% count() #' #' # tally() is a lower-level function that assumes you've done the grouping #' starwars %>% tally() #' starwars %>% group_by(species) %>% tally() #' #' # both count() and tally() have add_ variants that work like #' # mutate() instead of summarise #' df %>% add_count(gender, wt = runs) #' df %>% add_tally(wt = runs) count <- function(x, ..., wt = NULL, sort = FALSE, name = NULL) { UseMethod("count") } #' @export #' @rdname count count.data.frame <- function(x, ..., wt = NULL, sort = FALSE, name = NULL, .drop = group_by_drop_default(x)) { dplyr_local_error_call() if (!missing(...)) { out <- group_by(x, ..., .add = TRUE, .drop = .drop) } else { out <- x } out <- tally(out, wt = !!enquo(wt), sort = sort, name = name) # Ensure grouping is transient out <- dplyr_reconstruct(out, x) out } #' @export #' @rdname count tally <- function(x, wt = NULL, sort = FALSE, name = NULL) { UseMethod("tally") } #' @export tally.data.frame <- function(x, wt = NULL, sort = FALSE, name = NULL) { name <- check_n_name(name, group_vars(x)) dplyr_local_error_call() n <- tally_n(x, {{ wt }}) local_options(dplyr.summarise.inform = FALSE) out <- summarise(x, !!name := !!n) if (sort) { arrange(out, desc(!!sym(name))) } else { out } } #' @export #' @rdname count add_count <- function(x, ..., wt = NULL, sort = FALSE, name = NULL, .drop = deprecated()) { UseMethod("add_count") } #' @export add_count.default <- function(x, ..., wt = NULL, sort = FALSE, name = NULL, .drop = deprecated()) { add_count_impl( x, ..., wt = {{ wt }}, sort = sort, name = name, .drop = .drop ) } #' @export add_count.data.frame <- function(x, ..., wt = NULL, sort = FALSE, name = NULL, .drop = deprecated()) { out <- add_count_impl( x, ..., wt = {{ wt }}, sort = sort, name = name, .drop = .drop ) dplyr_reconstruct(out, x) } add_count_impl <- function(x, ..., wt = NULL, sort = FALSE, name = NULL, .drop = deprecated(), error_call = caller_env()) { if (!is_missing(.drop)) { lifecycle::deprecate_warn("1.0.0", "add_count(.drop = )", always = TRUE) } dplyr_local_error_call(error_call) if (!missing(...)) { out <- group_by(x, ..., .add = TRUE) } else { out <- x } add_tally(out, wt = {{ wt }}, sort = sort, name = name) } #' @rdname count #' @export add_tally <- function(x, wt = NULL, sort = FALSE, name = NULL) { name <- check_n_name(name, tbl_vars(x)) dplyr_local_error_call() n <- tally_n(x, {{ wt }}) out <- mutate(x, !!name := !!n) if (sort) { arrange(out, desc(!!sym(name))) } else { out } } # Helpers ----------------------------------------------------------------- tally_n <- function(x, wt) { wt <- enquo(wt) if (is_call(quo_get_expr(wt), "n", n = 0)) { # Provided only by dplyr 1.0.0. See #5349 for discussion. warn(c( "`wt = n()` is deprecated", i = "You can now omit the `wt` argument" )) wt <- quo(NULL) } if (quo_is_null(wt)) { expr(n()) } else { expr(sum(!!wt, na.rm = TRUE)) } } check_n_name <- function(name, vars, arg = caller_arg(name), call = caller_env()) { if (is.null(name)) { name <- n_name(vars) if (name != "n") { inform(c( glue("Storing counts in `{name}`, as `n` already present in input"), i = "Use `name = \"new_name\"` to pick a new name." )) } } else { check_string(name, arg = arg, call = call) } name } n_name <- function(x) { name <- "n" while (name %in% x) { name <- paste0("n", name) } name } dplyr/R/conditions.R0000644000176200001440000002403414406402754014112 0ustar liggesusers#' Local error call for dplyr verbs #' @noRd dplyr_local_error_call <- function(call = frame, frame = caller_env()) { # This doesn't implement the semantics of a `local_` function # perfectly in order to be as fast as possible frame$.__dplyr_error_call__. <- call invisible(NULL) } # Takes the local call by default. If the caller of the verb has # called `dplyr_local_error_call()`, we used that call instead. This # logic is slightly different than in checking functions or error # helpers, where the error call is always taken from the parent by # default. dplyr_error_call <- function(call) { if (is_missing(call)) { call <- caller_env() } while (is_environment(call)) { caller <- eval_bare(quote(base::parent.frame()), call) caller_call <- caller[[".__dplyr_error_call__."]] if (is_null(caller_call)) { break } call <- caller_call } call } cnd_bullet_cur_group_label <- function(what = "error") { label <- cur_group_label() if (label != "") { glue("In {label}.") } } cnd_bullet_rowwise_unlist <- function() { if (peek_mask()$is_rowwise()) { glue_data(peek_error_context(), "Did you mean: `{error_name} = list({quo_as_label(error_quo)})` ?") } } or_1 <- function(x) { if(x == 1L) { "1" } else { glue("{x} or 1") } } has_active_group_context <- function(mask) { mask$get_current_group_id_mutable() != 0L } # Common ------------------------------------------------------------------ is_data_pronoun <- function(x) { is_call(x, c("[[", "$")) && identical(x[[2]], sym(".data")) } # Because as_label() strips off .data$<> and .data[[<>]] quo_as_label <- function(quo) { expr <- quo_get_expr(quo) if (is_data_pronoun(expr)) { deparse(expr)[[1]] } else{ with_no_rlang_infix_labeling(as_label(expr)) } } local_error_context <- function(dots, i, mask, frame = caller_env()) { ctxt <- new_error_context(dots, i, mask = mask) context_local("dplyr_error_context", ctxt, frame = frame) } peek_error_context <- function() { context_peek("dplyr_error_context", "dplyr error handling") } new_error_context <- function(dots, i, mask) { if (!length(dots) || i == 0L) { env( error_name = "", error_quo = NULL, mask = mask ) } else { # Saving the quosure rather than the result of `quo_as_label()` to avoid # slow label creation unless required env( error_name = names(dots)[[i]], error_quo = dots[[i]], mask = mask ) } } # Doesn't restore values. To be called within a # `local_error_context()` in charge of restoring. poke_error_context <- function(dots, i, mask) { ctxt <- new_error_context(dots, i, mask = mask) context_poke("dplyr_error_context", ctxt) } mask_type <- function(mask = peek_mask()) { if (mask$get_size() > 0) { if (mask$is_grouped()) { return("grouped") } else if (mask$is_rowwise()) { return("rowwise") } } "ungrouped" } ctxt_error_label <- function(ctxt = peek_error_context()) { error_label(ctxt$error_name, ctxt$error_quo) } error_label <- function(name, quo) { if (is_null(name) || !nzchar(name)) { quo_as_label(quo) } else { name } } ctxt_error_label_named <- function(ctxt = peek_error_context()) { error_label_named(ctxt$error_name, ctxt$error_quo) } error_label_named <- function(name, quo) { if (is_null(name) || !nzchar(name)) { quo_as_label(quo) } else { paste0(name, " = ", quo_as_label(quo)) } } cnd_bullet_header <- function(what) { ctxt <- peek_error_context() label <- ctxt_error_label_named(ctxt) if (is_string(what, "recycle")) { glue("Can't {what} `{label}`.") } else { c("i" = glue("In argument: `{label}`.")) } } cnd_bullet_combine_details <- function(x, arg) { id <- as.integer(sub("^..", "", arg)) group <- peek_mask()$get_keys()[id, ] details <- cur_group_label(id = group, group = group) glue("Result of type <{vec_ptype_full(x)}> for {details}.") } err_vars <- function(x) { if (is.logical(x)) { x <- which(x) } if (is.character(x)) { x <- encodeString(x, quote = "`") } glue_collapse(x, sep = ", ", last = if (length(x) <= 2) " and " else ", and ") } err_locs <- function(x) { if (!is.integer(x)) { abort("`x` must be an integer vector of locations.", .internal = TRUE) } size <- length(x) if (size == 0L) { abort("`x` must have at least 1 location.", .internal = TRUE) } if (size > 5L) { x <- x[1:5] extra <- glue(" and {size - 5L} more") } else { extra <- "" } x <- glue_collapse(x, sep = ", ") glue("`c({x})`{extra}") } dplyr_internal_error <- function(class = NULL, data = list()) { abort(class = c(class, "dplyr:::internal_error"), dplyr_error_data = data) } dplyr_internal_signal <- function(class) { signal(message = "Internal dplyr signal", class = c(class, "dplyr:::internal_signal")) } skip_internal_condition <- function(cnd) { if (inherits(cnd, "dplyr:::internal_error")) { cnd$parent } else { cnd } } dplyr_error_handler <- function(dots, mask, bullets, error_call, action = "compute", error_class = NULL, i_sym = "i", frame = caller_env()) { force(frame) function(cnd) { local_error_context(dots, i = frame[[i_sym]], mask = mask) if (inherits(cnd, "dplyr:::internal_error")) { parent <- error_cnd(message = bullets(cnd)) } else { parent <- cnd } # FIXME: Must be after calling `bullets()` because the # `dplyr:::summarise_incompatible_size` method sets the correct # group by side effect message <- c( cnd_bullet_header(action), "i" = if (has_active_group_context(mask)) cnd_bullet_cur_group_label() ) abort( message, class = error_class, parent = parent, call = error_call ) } } # Warnings ------------------------------------------------------------- #' Show warnings from the last command #' #' Warnings that occur inside a dplyr verb like `mutate()` are caught #' and stashed away instead of being emitted to the console. This #' prevents rowwise and grouped data frames from flooding the console #' with warnings. To see the original warnings, use #' `last_dplyr_warnings()`. #' #' @param n Passed to [head()] so that only the first `n` warnings are #' displayed. #' @keywords internal #' @export last_dplyr_warnings <- function(n = 5) { if (!identical(n, Inf)) { check_number_whole(n) stopifnot(n >= 0) } warnings <- the$last_warnings n_remaining <- max(length(warnings) - n, 0L) warnings <- head(warnings, n = n) warnings <- map(warnings, new_dplyr_warning) structure( warnings, class = c("last_dplyr_warnings", "list"), n_shown = n, n_remaining = n_remaining ) } on_load({ the$last_warnings <- list() the$last_cmd_frame <- "" }) dplyr_warning_handler <- function(state, mask, error_call) { # `error_call()` does some non-trivial work, e.g. climbing frame # environments to find generic calls. We avoid evaluating it # repeatedly in the loop by assigning it here (lazily as we only # need it for the error path). delayedAssign("error_call_forced", error_call(error_call)) function(cnd) { # Don't entrace more than 5 warnings because this is very costly if (is_null(cnd$trace) && length(state$warnings) < 5) { cnd$trace <- trace_back(bottom = error_call) } new <- cnd_data( cnd = cnd, ctxt = peek_error_context(), mask = mask, call = error_call_forced ) state$warnings <- c(state$warnings, list(new)) maybe_restart("muffleWarning") } } # Flushes warnings if a new top-level command is detected push_dplyr_warnings <- function(warnings) { last <- the$last_cmd_frame current <- obj_address(sys.frame(1)) if (!identical(last, current)) { reset_dplyr_warnings() the$last_cmd_frame <- current } the$last_warnings <- c(the$last_warnings, warnings) } # Also used in tests reset_dplyr_warnings <- function() { the$last_warnings <- list() } signal_warnings <- function(state, error_call) { warnings <- state$warnings n <- length(warnings) if (!n) { return() } push_dplyr_warnings(warnings) first <- new_dplyr_warning(warnings[[1]]) call <- format_error_call(error_call) if (nzchar(names2(cnd_header(first))[[1]])) { prefix <- NULL } else { prefix <- paste0(cli::col_yellow("!"), " ") } msg <- paste_line( cli::format_warning(c( "There {cli::qty(n)} {?was/were} {n} warning{?s} in {call}.", if (n > 1) "The first warning was:" )), paste0(prefix, cnd_message(first)), if (n > 1) cli::format_warning(c( i = "Run {.run dplyr::last_dplyr_warnings()} to see the {n - 1} remaining warning{?s}." )) ) warn(msg, use_cli_format = FALSE) } new_dplyr_warning <- function(data) { if (data$has_group_data) { group_label <- cur_group_label( data$type, data$group_data$id, data$group_data$group ) } else { group_label <- "" } label <- error_label_named(data$name, data$quo) msg <- c( "i" = glue::glue("In argument: `{label}`."), "i" = if (nzchar(group_label)) glue("In {group_label}.") ) warning_cnd( message = msg, parent = data$cnd, call = data$call, trace = data$cnd$trace ) } #' @export print.last_dplyr_warnings <- function(x, ...) { # Opt into experimental grayed out tree local_options( "rlang:::trace_display_tree" = TRUE ) print(unstructure(x), ..., simplify = "none") n_remaining <- attr(x, "n_remaining") if (n_remaining) { n_more <- attr(x, "n_shown") * 2 cli::cli_bullets(c( "... with {n_remaining} more warning{?s}.", "i" = "Run {.run dplyr::last_dplyr_warnings(n = {n_more})} to show more." )) } } # rlang should export this routine error_call <- function(call) { tryCatch( abort("", call = call), error = conditionCall ) } cnd_message_lines <- function(cnd, ...) { c( "!" = cnd_header(cnd, ...), cnd_body(cnd, ...), cnd_footer(cnd, ...) ) } dplyr/R/lead-lag.R0000644000176200001440000000731414406402754013411 0ustar liggesusers#' Compute lagged or leading values #' #' Find the "previous" (`lag()`) or "next" (`lead()`) values in a vector. Useful #' for comparing values behind of or ahead of the current values. #' #' @param x A vector #' @param n Positive integer of length 1, giving the number of positions to #' lag or lead by #' @param default The value used to pad `x` back to its original size after the #' lag or lead has been applied. The default, `NULL`, pads with a missing #' value. If supplied, this must be a vector with size 1, which will be cast #' to the type of `x`. #' @param order_by An optional secondary vector that defines the ordering to use #' when applying the lag or lead to `x`. If supplied, this must be the same #' size as `x`. #' @param ... Not used. #' #' @return #' A vector with the same type and size as `x`. #' #' @name lead-lag #' @examples #' lag(1:5) #' lead(1:5) #' #' x <- 1:5 #' tibble(behind = lag(x), x, ahead = lead(x)) #' #' # If you want to look more rows behind or ahead, use `n` #' lag(1:5, n = 1) #' lag(1:5, n = 2) #' #' lead(1:5, n = 1) #' lead(1:5, n = 2) #' #' # If you want to define a value to pad with, use `default` #' lag(1:5) #' lag(1:5, default = 0) #' #' lead(1:5) #' lead(1:5, default = 6) #' #' # If the data are not already ordered, use `order_by` #' scrambled <- slice_sample( #' tibble(year = 2000:2005, value = (0:5) ^ 2), #' prop = 1 #' ) #' #' wrong <- mutate(scrambled, previous_year_value = lag(value)) #' arrange(wrong, year) #' #' right <- mutate(scrambled, previous_year_value = lag(value, order_by = year)) #' arrange(right, year) NULL #' @export #' @rdname lead-lag lag <- function(x, n = 1L, default = NULL, order_by = NULL, ...) { if (inherits(x, "ts")) { abort("`x` must be a vector, not a , do you want `stats::lag()`?") } check_dots_empty0(...) check_number_whole(n) if (n < 0L) { abort("`n` must be positive.") } shift(x, n = n, default = default, order_by = order_by) } #' @export #' @rdname lead-lag lead <- function(x, n = 1L, default = NULL, order_by = NULL, ...) { check_dots_empty0(...) check_number_whole(n) if (n < 0L) { abort("`n` must be positive.") } shift(x, n = -n, default = default, order_by = order_by) } shift <- function(x, ..., n = 1L, default = NULL, order_by = NULL, error_call = caller_env()) { check_dots_empty0(...) if (!is.null(order_by)) { out <- with_order( order_by = order_by, fun = shift, x = x, n = n, default = default, error_call = error_call ) return(out) } obj_check_vector(x, call = error_call) check_number_whole(n) n <- vec_cast(n, integer(), call = error_call) if (!is.null(default)) { vec_check_size(default, size = 1L, call = error_call) default <- vec_cast( x = default, to = x, x_arg = "default", to_arg = "x", call = error_call ) } lag <- n >= 0L n <- abs(n) size <- vec_size(x) if (n > size) { n <- size } if (is.null(default)) { shift_slice(x, n, size, lag) } else { shift_c(x, n, size, lag, default) } } shift_slice <- function(x, n, size, lag) { loc_default <- vec_rep(NA_integer_, n) if (lag) { loc <- seq2(1L, size - n) loc <- vec_c(loc_default, loc) vec_slice(x, loc) } else { loc <- seq2(1L + n, size) loc <- vec_c(loc, loc_default) vec_slice(x, loc) } } shift_c <- function(x, n, size, lag, default) { default <- vec_rep(default, n) if (lag) { loc <- seq2(1L, size - n) x <- vec_slice(x, loc) vec_c(default, x, .ptype = x) } else { loc <- seq2(1L + n, size) x <- vec_slice(x, loc) vec_c(x, default, .ptype = x) } } dplyr/R/group-by.R0000644000176200001440000002302114366556340013506 0ustar liggesusers#' Group by one or more variables #' #' @description #' Most data operations are done on groups defined by variables. #' `group_by()` takes an existing tbl and converts it into a grouped tbl #' where operations are performed "by group". `ungroup()` removes grouping. #' #' @family grouping functions #' @inheritParams arrange #' @param ... In `group_by()`, variables or computations to group by. #' Computations are always done on the ungrouped data frame. #' To perform computations on the grouped data, you need to use #' a separate `mutate()` step before the `group_by()`. #' Computations are not allowed in `nest_by()`. #' In `ungroup()`, variables to remove from the grouping. #' @param .add When `FALSE`, the default, `group_by()` will #' override existing groups. To add to the existing groups, use #' `.add = TRUE`. #' #' This argument was previously called `add`, but that prevented #' creating a new grouping variable called `add`, and conflicts with #' our naming conventions. #' @param .drop Drop groups formed by factor levels that don't appear in the #' data? The default is `TRUE` except when `.data` has been previously #' grouped with `.drop = FALSE`. See [group_by_drop_default()] for details. #' @return A grouped data frame with class [`grouped_df`][grouped_df], #' unless the combination of `...` and `add` yields a empty set of #' grouping columns, in which case a tibble will be returned. #' @section Methods: #' These function are **generic**s, which means that packages can provide #' implementations (methods) for other classes. See the documentation of #' individual methods for extra arguments and differences in behaviour. #' #' Methods available in currently loaded packages: #' #' * `group_by()`: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("group_by")}. #' * `ungroup()`: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("ungroup")}. #' #' @section Ordering: #' Currently, `group_by()` internally orders the groups in ascending order. This #' results in ordered output from functions that aggregate groups, such as #' [summarise()]. #' #' When used as grouping columns, character vectors are ordered in the C locale #' for performance and reproducibility across R sessions. If the resulting #' ordering of your grouped operation matters and is dependent on the locale, #' you should follow up the grouped operation with an explicit call to #' [arrange()] and set the `.locale` argument. For example: #' #' ``` #' data %>% #' group_by(chr) %>% #' summarise(avg = mean(x)) %>% #' arrange(chr, .locale = "en") #' ``` #' #' This is often useful as a preliminary step before generating content intended #' for humans, such as an HTML table. #' #' ## Legacy behavior #' #' Prior to dplyr 1.1.0, character vector grouping columns were ordered in the #' system locale. If you need to temporarily revert to this behavior, you can #' set the global option `dplyr.legacy_locale` to `TRUE`, but this should be #' used sparingly and you should expect this option to be removed in a future #' version of dplyr. It is better to update existing code to explicitly call #' `arrange(.locale = )` instead. Note that setting `dplyr.legacy_locale` will #' also force calls to [arrange()] to use the system locale. #' #' @export #' @examples #' by_cyl <- mtcars %>% group_by(cyl) #' #' # grouping doesn't change how the data looks (apart from listing #' # how it's grouped): #' by_cyl #' #' # It changes how it acts with the other dplyr verbs: #' by_cyl %>% summarise( #' disp = mean(disp), #' hp = mean(hp) #' ) #' by_cyl %>% filter(disp == max(disp)) #' #' # Each call to summarise() removes a layer of grouping #' by_vs_am <- mtcars %>% group_by(vs, am) #' by_vs <- by_vs_am %>% summarise(n = n()) #' by_vs #' by_vs %>% summarise(n = sum(n)) #' #' # To removing grouping, use ungroup #' by_vs %>% #' ungroup() %>% #' summarise(n = sum(n)) #' #' # By default, group_by() overrides existing grouping #' by_cyl %>% #' group_by(vs, am) %>% #' group_vars() #' #' # Use add = TRUE to instead append #' by_cyl %>% #' group_by(vs, am, .add = TRUE) %>% #' group_vars() #' #' # You can group by expressions: this is a short-hand #' # for a mutate() followed by a group_by() #' mtcars %>% #' group_by(vsam = vs + am) #' #' # The implicit mutate() step is always performed on the #' # ungrouped data. Here we get 3 groups: #' mtcars %>% #' group_by(vs) %>% #' group_by(hp_cut = cut(hp, 3)) #' #' # If you want it to be performed by groups, #' # you have to use an explicit mutate() call. #' # Here we get 3 groups per value of vs #' mtcars %>% #' group_by(vs) %>% #' mutate(hp_cut = cut(hp, 3)) %>% #' group_by(hp_cut) #' #' # when factors are involved and .drop = FALSE, groups can be empty #' tbl <- tibble( #' x = 1:10, #' y = factor(rep(c("a", "c"), each = 5), levels = c("a", "b", "c")) #' ) #' tbl %>% #' group_by(y, .drop = FALSE) %>% #' group_rows() #' group_by <- function(.data, ..., .add = FALSE, .drop = group_by_drop_default(.data)) { UseMethod("group_by") } #' @export group_by.data.frame <- function(.data, ..., .add = FALSE, .drop = group_by_drop_default(.data)) { groups <- group_by_prepare( .data, ..., .add = .add, error_call = current_env() ) grouped_df(groups$data, groups$group_names, .drop) } #' @rdname group_by #' @export #' @param x A [tbl()] ungroup <- function(x, ...) { UseMethod("ungroup") } #' @export ungroup.grouped_df <- function(x, ...) { if (missing(...)) { as_tibble(x) } else { old_groups <- group_vars(x) to_remove <- tidyselect::eval_select( expr = expr(c(...)), data = x, allow_rename = FALSE ) to_remove <- names(to_remove) new_groups <- setdiff(old_groups, to_remove) group_by(x, !!!syms(new_groups)) } } #' @export ungroup.rowwise_df <- function(x, ...) { check_dots_empty() as_tibble(x) } #' @export ungroup.data.frame <- function(x, ...) { check_dots_empty() x } #' Prepare for grouping. #' #' `*_prepare()` performs standard manipulation that is needed prior #' to actual data processing. They are only be needed by packages #' that implement dplyr backends. #' #' @return A list #' \item{data}{Modified tbl} #' \item{groups}{Modified groups} #' @export #' @keywords internal group_by_prepare <- function(.data, ..., .add = FALSE, .dots = deprecated(), add = deprecated(), error_call = caller_env()) { error_call <- dplyr_error_call(error_call) if (!missing(add)) { lifecycle::deprecate_warn("1.0.0", "group_by(add = )", "group_by(.add = )", always = TRUE) .add <- add } new_groups <- enquos(..., .ignore_empty = "all") if (!missing(.dots)) { # Used by dbplyr 1.4.2 so can't aggressively deprecate lifecycle::deprecate_warn("1.0.0", "group_by(.dots = )", always = TRUE) new_groups <- c(new_groups, compat_lazy_dots(.dots, env = caller_env(2))) } # If any calls, use mutate to add new columns, then group by those computed_columns <- add_computed_columns(.data, new_groups, error_call = error_call) out <- computed_columns$data group_names <- computed_columns$added_names if (.add) { group_names <- union(group_vars(.data), group_names) } unknown <- setdiff(group_names, tbl_vars(out)) if (length(unknown) > 0) { bullets <- c( "Must group by variables found in `.data`.", x = glue("Column `{unknown}` is not found.") ) abort(bullets, call = error_call) } list( data = out, groups = syms(group_names), group_names = group_names ) } add_computed_columns <- function(.data, vars, error_call = caller_env()) { is_symbol <- map_lgl(vars, quo_is_variable_reference) needs_mutate <- have_name(vars) | !is_symbol if (any(needs_mutate)) { # TODO: use less of a hack if (inherits(.data, "data.frame")) { bare_data <- ungroup(.data) by <- compute_by(by = NULL, data = bare_data) cols <- mutate_cols( bare_data, dplyr_quosures(!!!vars), by = by, error_call = error_call ) out <- dplyr_col_modify(.data, cols) col_names <- names(cols) } else { out <- mutate(.data, !!!vars) col_names <- names(exprs_auto_name(vars)) } } else { out <- .data col_names <- names(exprs_auto_name(vars)) } list(data = out, added_names = col_names) } quo_is_variable_reference <- function(quo) { if (quo_is_symbol(quo)) { return(TRUE) } if (quo_is_call(quo, n = 2)) { expr <- quo_get_expr(quo) if (is_call(expr, c("$", "[["))) { if (!identical(expr[[2]], sym(".data"))) { return(FALSE) } param <- expr[[3]] if (is_symbol(param) || is_string(param)) { return(TRUE) } } } FALSE } #' Default value for .drop argument of group_by #' #' @param .tbl A data frame #' #' @return `TRUE` unless `.tbl` is a grouped data frame that was previously #' obtained by `group_by(.drop = FALSE)` #' #' @examples #' group_by_drop_default(iris) #' #' iris %>% #' group_by(Species) %>% #' group_by_drop_default() #' #' iris %>% #' group_by(Species, .drop = FALSE) %>% #' group_by_drop_default() #' #' @keywords internal #' @export group_by_drop_default <- function(.tbl) { UseMethod("group_by_drop_default") } #' @export group_by_drop_default.default <- function(.tbl) { TRUE } #' @export group_by_drop_default.grouped_df <- function(.tbl) { tryCatch({ !identical(attr(group_data(.tbl), ".drop"), FALSE) }, error = function(e){ TRUE }) } dplyr/R/reexport-tibble.R0000644000176200001440000000071214366556340015053 0ustar liggesusers#' @importFrom tibble data_frame #' @export tibble::data_frame #' @importFrom tibble as_data_frame #' @export tibble::as_data_frame #' @importFrom tibble lst #' @export tibble::lst #' @importFrom tibble add_row #' @export tibble::add_row #' @importFrom tibble tribble #' @export tibble::tribble #' @importFrom tibble tibble #' @export tibble::tibble #' @importFrom tibble as_tibble #' @export tibble::as_tibble #' @importFrom tibble view tibble::view dplyr/R/locale.R0000644000176200001440000000717714366556340013217 0ustar liggesusers#' Locale used by `arrange()` #' #' @description #' This page documents details about the locale used by [arrange()] when #' ordering character vectors. #' #' ## Default locale #' #' The default locale used by `arrange()` is the C locale. This is used when #' `.locale = NULL` unless the `dplyr.legacy_locale` global option is set to #' `TRUE`. You can also force the C locale to be used unconditionally with #' `.locale = "C"`. #' #' The C locale is not exactly the same as English locales, such as `"en"`. The #' main difference is that the C locale groups the English alphabet by _case_, #' while most English locales group the alphabet by _letter_. For example, #' `c("a", "b", "C", "B", "c")` will sort as `c("B", "C", "a", "b", "c")` in the #' C locale, with all uppercase letters coming before lowercase letters, but #' will sort as `c("a", "b", "B", "c", "C")` in an English locale. This often #' makes little practical difference during data analysis, because both return #' identical results when case is consistent between observations. #' #' ## Reproducibility #' #' The C locale has the benefit of being completely reproducible across all #' supported R versions and operating systems with no extra effort. #' #' If you set `.locale` to an option from [stringi::stri_locale_list()], then #' stringi must be installed by anyone who wants to run your code. If you #' utilize this in a package, then stringi should be placed in `Imports`. #' #' ## Legacy behavior #' #' Prior to dplyr 1.1.0, character columns were ordered in the system locale. If #' you need to temporarily revert to this behavior, you can set the global #' option `dplyr.legacy_locale` to `TRUE`, but this should be used sparingly and #' you should expect this option to be removed in a future version of dplyr. It #' is better to update existing code to explicitly use `.locale` instead. Note #' that setting `dplyr.legacy_locale` will also force calls to [group_by()] to #' use the system locale when internally ordering the groups. #' #' Setting `.locale` will override any usage of `dplyr.legacy_locale`. #' #' @name dplyr-locale #' @keywords internal #' @examplesIf dplyr:::has_minimum_stringi() #' df <- tibble(x = c("a", "b", "C", "B", "c")) #' df #' #' # Default locale is C, which groups the English alphabet by case, placing #' # uppercase letters before lowercase letters. #' arrange(df, x) #' #' # The American English locale groups the alphabet by letter. #' # Explicitly override `.locale` with `"en"` for this ordering. #' arrange(df, x, .locale = "en") #' #' # This Danish letter is expected to sort after `z` #' df <- tibble(x = c("o", "p", "\u00F8", "z")) #' df #' #' # The American English locale sorts it right after `o` #' arrange(df, x, .locale = "en") #' #' # Using `"da"` for Danish ordering gives the expected result #' arrange(df, x, .locale = "da") #' #' # If you need the legacy behavior of `arrange()`, which respected the #' # system locale, then you can set the global option `dplyr.legacy_locale`, #' # but expect this to be removed in the future. We recommend that you use #' # the `.locale` argument instead. #' rlang::with_options(dplyr.legacy_locale = TRUE, { #' arrange(df, x) #' }) NULL dplyr_legacy_locale <- function() { # Used to determine if `group_by()` and `arrange()` should use # base R's `order()` for sorting, which respects the system locale and was # our sorting engine pre-1.1.0. out <- peek_option("dplyr.legacy_locale") %||% FALSE if (!is_bool(out)) { abort( "Global option `dplyr.legacy_locale` must be a single `TRUE` or `FALSE`.", call = NULL ) } out } has_minimum_stringi <- function() { is_installed("stringi", version = "1.5.3") } dplyr/R/recode.R0000644000176200001440000002574714366556340013224 0ustar liggesusers#' Recode values #' #' @description #' `r lifecycle::badge("superseded")` #' #' `recode()` is superseded in favor of [case_match()], which handles the most #' important cases of `recode()` with a more elegant interface. #' `recode_factor()` is also superseded, however, its direct replacement is not #' currently available but will eventually live in #' [forcats](https://forcats.tidyverse.org/). For creating new variables based #' on logical vectors, use [if_else()]. For even more complicated criteria, use #' [case_when()]. #' #' `recode()` is a vectorised version of [switch()]: you can replace numeric #' values based on their position or their name, and character or factor values #' only by their name. This is an S3 generic: dplyr provides methods for #' numeric, character, and factors. You can use `recode()` directly with #' factors; it will preserve the existing order of levels while changing the #' values. Alternatively, you can use `recode_factor()`, which will change the #' order of levels to match the order of replacements. #' #' @param .x A vector to modify #' @param ... <[`dynamic-dots`][rlang::dyn-dots]> Replacements. For character and factor `.x`, these should be named #' and replacement is based only on their name. For numeric `.x`, these can be #' named or not. If not named, the replacement is done based on position i.e. #' `.x` represents positions to look for in replacements. See examples. #' #' When named, the argument names should be the current values to be replaced, and the #' argument values should be the new (replacement) values. #' #' All replacements must be the same type, and must have either #' length one or the same length as `.x`. #' @param .default If supplied, all values not otherwise matched will #' be given this value. If not supplied and if the replacements are #' the same type as the original values in `.x`, unmatched #' values are not changed. If not supplied and if the replacements #' are not compatible, unmatched values are replaced with `NA`. #' #' `.default` must be either length 1 or the same length as #' `.x`. #' @param .missing If supplied, any missing values in `.x` will be #' replaced by this value. Must be either length 1 or the same length as #' `.x`. #' @param .ordered If `TRUE`, `recode_factor()` creates an #' ordered factor. #' @return A vector the same length as `.x`, and the same type as #' the first of `...`, `.default`, or `.missing`. #' `recode_factor()` returns a factor whose levels are in the same order as #' in `...`. The levels in `.default` and `.missing` come last. #' @seealso [na_if()] to replace specified values with a `NA`. #' #' [coalesce()] to replace missing values with a specified value. #' #' [tidyr::replace_na()] to replace `NA` with a value. #' @export #' @examples #' char_vec <- sample(c("a", "b", "c"), 10, replace = TRUE) #' #' # `recode()` is superseded by `case_match()` #' recode(char_vec, a = "Apple", b = "Banana") #' case_match(char_vec, "a" ~ "Apple", "b" ~ "Banana", .default = char_vec) #' #' # With `case_match()`, you don't need typed missings like `NA_character_` #' recode(char_vec, a = "Apple", b = "Banana", .default = NA_character_) #' case_match(char_vec, "a" ~ "Apple", "b" ~ "Banana", .default = NA) #' #' # Throws an error as `NA` is logical, not character. #' try(recode(char_vec, a = "Apple", b = "Banana", .default = NA)) #' #' # `case_match()` is easier to use with numeric vectors, because you don't #' # need to turn the numeric values into names #' num_vec <- c(1:4, NA) #' recode(num_vec, `2` = 20L, `4` = 40L) #' case_match(num_vec, 2 ~ 20, 4 ~ 40, .default = num_vec) #' #' # `case_match()` doesn't have the ability to match by position like #' # `recode()` does with numeric vectors #' recode(num_vec, "a", "b", "c", "d") #' recode(c(1,5,3), "a", "b", "c", "d", .default = "nothing") #' #' # For `case_match()`, incompatible types are an error rather than a warning #' recode(num_vec, `2` = "b", `4` = "d") #' try(case_match(num_vec, 2 ~ "b", 4 ~ "d", .default = num_vec)) #' #' # The factor method of `recode()` can generally be replaced with #' # `forcats::fct_recode()` #' factor_vec <- factor(c("a", "b", "c")) #' recode(factor_vec, a = "Apple") #' #' # `recode_factor()` does not currently have a direct replacement, but we #' # plan to add one to forcats. In the meantime, you can use the `.ptype` #' # argument to `case_match()`. #' recode_factor( #' num_vec, #' `1` = "z", #' `2` = "y", #' `3` = "x", #' .default = "D", #' .missing = "M" #' ) #' case_match( #' num_vec, #' 1 ~ "z", #' 2 ~ "y", #' 3 ~ "x", #' NA ~ "M", #' .default = "D", #' .ptype = factor(levels = c("z", "y", "x", "D", "M")) #' ) recode <- function(.x, ..., .default = NULL, .missing = NULL) { # Superseded in dplyr 1.1.0 lifecycle::signal_stage("superseded", "recode()", "case_match()") UseMethod("recode") } #' @export recode.numeric <- function(.x, ..., .default = NULL, .missing = NULL) { values <- list2(...) nms <- have_name(values) if (all(nms)) { vals <- as.double(names(values)) } else if (all(!nms)) { vals <- seq_along(values) } else { msg <- "Either all values must be named, or none must be named." abort(msg) } n <- length(.x) template <- find_template(values, .default, .missing) out <- template[rep(NA_integer_, n)] replaced <- rep(FALSE, n) for (i in seq_along(values)) { out <- replace_with(out, .x == vals[i], values[[i]], paste0("Vector ", i)) replaced[.x == vals[i]] <- TRUE } .default <- validate_recode_default(.default, .x, out, replaced) out <- replace_with(out, !replaced & !is.na(.x), .default, "`.default`") out <- replace_with(out, is.na(.x), .missing, "`.missing`") out } #' @export recode.character <- function(.x, ..., .default = NULL, .missing = NULL) { .x <- as.character(.x) values <- list2(...) if (!all(have_name(values))) { bad <- which(!have_name(values)) + 1 msg <- glue("{fmt_pos_args(bad)} must be named.") abort(msg) } n <- length(.x) template <- find_template(values, .default, .missing) out <- template[rep(NA_integer_, n)] replaced <- rep(FALSE, n) for (nm in names(values)) { out <- replace_with(out, .x == nm, values[[nm]], paste0("`", nm, "`")) replaced[.x == nm] <- TRUE } .default <- validate_recode_default(.default, .x, out, replaced) out <- replace_with(out, !replaced & !is.na(.x), .default, "`.default`") out <- replace_with(out, is.na(.x), .missing, "`.missing`") out } #' @export recode.factor <- function(.x, ..., .default = NULL, .missing = NULL) { values <- list2(...) if (length(values) == 0) { abort("No replacements provided.") } if (!all(have_name(values))) { bad <- which(!have_name(values)) + 1 msg <- glue("{fmt_pos_args(bad)} must be named.") abort(msg) } if (!is.null(.missing)) { msg <- glue("`.missing` is not supported for factors.") abort(msg) } n <- length(levels(.x)) template <- find_template(values, .default, .missing) out <- template[rep(NA_integer_, n)] replaced <- rep(FALSE, n) for (nm in names(values)) { out <- replace_with( out, levels(.x) == nm, values[[nm]], paste0("`", nm, "`") ) replaced[levels(.x) == nm] <- TRUE } .default <- validate_recode_default(.default, .x, out, replaced) out <- replace_with(out, !replaced, .default, "`.default`") if (is.character(out)) { levels(.x) <- out .x } else { out[as.integer(.x)] } } find_template <- function(values, .default = NULL, .missing = NULL, error_call = caller_env()) { x <- compact(c(values, .default, .missing)) if (length(x) == 0) { abort("No replacements provided.", call = error_call) } x[[1]] } validate_recode_default <- function(default, x, out, replaced) { default <- recode_default(x, default, out) if (is.null(default) && sum(replaced & !is.na(x)) < length(out[!is.na(x)])) { bullets <- c( "Unreplaced values treated as NA as `.x` is not compatible. ", "Please specify replacements exhaustively or supply `.default`." ) warn(bullets) } default } recode_default <- function(x, default, out) { UseMethod("recode_default") } recode_default.default <- function(x, default, out) { same_type <- identical(typeof(x), typeof(out)) if (is.null(default) && same_type) { x } else { default } } recode_default.factor <- function(x, default, out) { if (is.null(default)) { if ((is.character(out) || is.factor(out)) && is.factor(x)) { levels(x) } else { out[NA_integer_] } } else { default } } #' @rdname recode #' @export recode_factor <- function(.x, ..., .default = NULL, .missing = NULL, .ordered = FALSE) { # Superseded in dplyr 1.1.0 lifecycle::signal_stage("superseded", "recode_factor()", I("`case_match(.ptype = factor(levels = ))`")) values <- list2(...) recoded <- recode(.x, !!!values, .default = .default, .missing = .missing) all_levels <- unique(c(values, recode_default(.x, .default, recoded), .missing)) recoded_levels <- if (is.factor(recoded)) levels(recoded) else unique(recoded) levels <- intersect(all_levels, recoded_levels) factor(recoded, levels, ordered = .ordered) } # ------------------------------------------------------------------------------ # Helpers replace_with <- function(x, i, val, name, reason = NULL, error_call = caller_env()) { if (is.null(val)) { return(x) } check_length(val, x, name, reason, error_call = error_call) check_type(val, x, name, error_call = error_call) check_class(val, x, name, error_call = error_call) i[is.na(i)] <- FALSE if (length(val) == 1L) { x[i] <- val } else { x[i] <- val[i] } x } fmt_check_length_val <- function(length_x, n, header, reason = NULL) { if (all(length_x %in% c(1L, n))) { return() } if (is.null(reason)) { reason <- "" } else { reason <- glue(" ({reason})") } if (n == 1) { glue("{header} must be length 1{reason}, not {commas(length_x)}.") } else { glue("{header} must be length {n}{reason} or one, not {commas(length_x)}.") } } check_length_val <- function(length_x, n, header, reason = NULL, error_call = caller_env()) { msg <- fmt_check_length_val(length_x, n, header, reason) if (length(msg)) { abort(msg, call = error_call) } } check_length <- function(x, template, header, reason = NULL, error_call = caller_env()) { check_length_val(length(x), length(template), header, reason, error_call = error_call) } check_type <- function(x, template, header, error_call = caller_env()) { if (identical(typeof(x), typeof(template))) { return() } msg <- glue("{header} must be {obj_type_friendly(template)}, not {obj_type_friendly(x)}.") abort(msg, call = error_call) } check_class <- function(x, template, header, error_call = caller_env()) { if (!is.object(x)) { return() } if (identical(class(x), class(template))) { return() } exp_classes <- fmt_classes(template) out_classes <- fmt_classes(x) msg <- glue("{header} must have class `{exp_classes}`, not class `{out_classes}`.") abort(msg, call = error_call) } dplyr/R/consecutive-id.R0000644000176200001440000000165614366556340014675 0ustar liggesusers#' Generate a unique identifier for consecutive combinations #' #' `consecutive_id()` generates a unique identifier that increments every time #' a variable (or combination of variables) changes. Inspired by #' `data.table::rleid()`. #' #' @inheritParams n_distinct #' @returns A numeric vector the same length as the longest #' element of `...`. #' @export #' @examples #' consecutive_id(c(TRUE, TRUE, FALSE, FALSE, TRUE, FALSE, NA, NA)) #' consecutive_id(c(1, 1, 1, 2, 1, 1, 2, 2)) #' #' df <- data.frame(x = c(0, 0, 1, 0), y = c(2, 2, 2, 2)) #' df %>% group_by(x, y) %>% summarise(n = n()) #' df %>% group_by(id = consecutive_id(x, y), x, y) %>% summarise(n = n()) consecutive_id <- function(...) { check_dots_unnamed() data <- df_list( ..., .unpack = FALSE, .name_repair = "minimal", .error_call = current_env() ) data <- new_data_frame(data) out <- vec_identify_runs(data) attr(out, "n") <- NULL out } dplyr/R/across.R0000644000176200001440000007371114406402754013241 0ustar liggesusers#' Apply a function (or functions) across multiple columns #' #' @description #' `across()` makes it easy to apply the same transformation to multiple #' columns, allowing you to use [select()] semantics inside in "data-masking" #' functions like [summarise()] and [mutate()]. See `vignette("colwise")` for #' more details. #' #' `if_any()` and `if_all()` apply the same #' predicate function to a selection of columns and combine the #' results into a single logical vector: `if_any()` is `TRUE` when #' the predicate is `TRUE` for *any* of the selected columns, `if_all()` #' is `TRUE` when the predicate is `TRUE` for *all* selected columns. #' #' If you just need to select columns without applying a transformation to each #' of them, then you probably want to use [pick()] instead. #' #' `across()` supersedes the family of "scoped variants" like #' `summarise_at()`, `summarise_if()`, and `summarise_all()`. #' #' @param .cols <[`tidy-select`][dplyr_tidy_select]> Columns to transform. #' You can't select grouping columns because they are already automatically #' handled by the verb (i.e. [summarise()] or [mutate()]). #' @param .fns Functions to apply to each of the selected columns. #' Possible values are: #' #' - A function, e.g. `mean`. #' - A purrr-style lambda, e.g. `~ mean(.x, na.rm = TRUE)` #' - A named list of functions or lambdas, e.g. #' `list(mean = mean, n_miss = ~ sum(is.na(.x))`. Each function is applied #' to each column, and the output is named by combining the function name #' and the column name using the glue specification in `.names`. #' #' Within these functions you can use [cur_column()] and [cur_group()] #' to access the current column and grouping keys respectively. #' @param ... `r lifecycle::badge("deprecated")` #' #' Additional arguments for the function calls in `.fns` are no longer #' accepted in `...` because it's not clear when they should be evaluated: #' once per `across()` or once per group? Instead supply additional arguments #' directly in `.fns` by using a lambda. For example, instead of #' `across(a:b, mean, na.rm = TRUE)` write #' `across(a:b, ~ mean(.x, na.rm = TRUE))`. #' @param .names A glue specification that describes how to name the output #' columns. This can use `{.col}` to stand for the selected column name, and #' `{.fn}` to stand for the name of the function being applied. The default #' (`NULL`) is equivalent to `"{.col}"` for the single function case and #' `"{.col}_{.fn}"` for the case where a list is used for `.fns`. #' @param .unpack `r lifecycle::badge("experimental")` #' #' Optionally [unpack][tidyr::unpack()] data frames returned by functions in #' `.fns`, which expands the df-columns out into individual columns, retaining #' the number of rows in the data frame. #' #' - If `FALSE`, the default, no unpacking is done. #' - If `TRUE`, unpacking is done with a default glue specification of #' `"{outer}_{inner}"`. #' - Otherwise, a single glue specification can be supplied to describe how to #' name the unpacked columns. This can use `{outer}` to refer to the name #' originally generated by `.names`, and `{inner}` to refer to the names of #' the data frame you are unpacking. #' #' @returns #' `across()` typically returns a tibble with one column for each column in #' `.cols` and each function in `.fns`. If `.unpack` is used, more columns may #' be returned depending on how the results of `.fns` are unpacked. #' #' `if_any()` and `if_all()` return a logical vector. #' #' @section Timing of evaluation: #' R code in dplyr verbs is generally evaluated once per group. #' Inside `across()` however, code is evaluated once for each #' combination of columns and groups. If the evaluation timing is #' important, for example if you're generating random variables, think #' about when it should happen and place your code in consequence. #' #' ```{r} #' gdf <- #' tibble(g = c(1, 1, 2, 3), v1 = 10:13, v2 = 20:23) %>% #' group_by(g) #' #' set.seed(1) #' #' # Outside: 1 normal variate #' n <- rnorm(1) #' gdf %>% mutate(across(v1:v2, ~ .x + n)) #' #' # Inside a verb: 3 normal variates (ngroup) #' gdf %>% mutate(n = rnorm(1), across(v1:v2, ~ .x + n)) #' #' # Inside `across()`: 6 normal variates (ncol * ngroup) #' gdf %>% mutate(across(v1:v2, ~ .x + rnorm(1))) #' ``` #' #' @examples #' # For better printing #' iris <- as_tibble(iris) #' #' # across() ----------------------------------------------------------------- #' # Different ways to select the same set of columns #' # See for details #' iris %>% #' mutate(across(c(Sepal.Length, Sepal.Width), round)) #' iris %>% #' mutate(across(c(1, 2), round)) #' iris %>% #' mutate(across(1:Sepal.Width, round)) #' iris %>% #' mutate(across(where(is.double) & !c(Petal.Length, Petal.Width), round)) #' #' # Using an external vector of names #' cols <- c("Sepal.Length", "Petal.Width") #' iris %>% #' mutate(across(all_of(cols), round)) #' #' # If the external vector is named, the output columns will be named according #' # to those names #' names(cols) <- tolower(cols) #' iris %>% #' mutate(across(all_of(cols), round)) #' #' # A purrr-style formula #' iris %>% #' group_by(Species) %>% #' summarise(across(starts_with("Sepal"), ~ mean(.x, na.rm = TRUE))) #' #' # A named list of functions #' iris %>% #' group_by(Species) %>% #' summarise(across(starts_with("Sepal"), list(mean = mean, sd = sd))) #' #' # Use the .names argument to control the output names #' iris %>% #' group_by(Species) %>% #' summarise(across(starts_with("Sepal"), mean, .names = "mean_{.col}")) #' iris %>% #' group_by(Species) %>% #' summarise(across(starts_with("Sepal"), list(mean = mean, sd = sd), .names = "{.col}.{.fn}")) #' #' # If a named external vector is used for column selection, .names will use #' # those names when constructing the output names #' iris %>% #' group_by(Species) %>% #' summarise(across(all_of(cols), mean, .names = "mean_{.col}")) #' #' # When the list is not named, .fn is replaced by the function's position #' iris %>% #' group_by(Species) %>% #' summarise(across(starts_with("Sepal"), list(mean, sd), .names = "{.col}.fn{.fn}")) #' #' # When the functions in .fns return a data frame, you typically get a #' # "packed" data frame back #' quantile_df <- function(x, probs = c(0.25, 0.5, 0.75)) { #' tibble(quantile = probs, value = quantile(x, probs)) #' } #' #' iris %>% #' reframe(across(starts_with("Sepal"), quantile_df)) #' #' # Use .unpack to automatically expand these packed data frames into their #' # individual columns #' iris %>% #' reframe(across(starts_with("Sepal"), quantile_df, .unpack = TRUE)) #' #' # .unpack can utilize a glue specification if you don't like the defaults #' iris %>% #' reframe(across(starts_with("Sepal"), quantile_df, .unpack = "{outer}.{inner}")) #' #' # This is also useful inside mutate(), for example, with a multi-lag helper #' multilag <- function(x, lags = 1:3) { #' names(lags) <- as.character(lags) #' purrr::map_dfr(lags, lag, x = x) #' } #' #' iris %>% #' group_by(Species) %>% #' mutate(across(starts_with("Sepal"), multilag, .unpack = TRUE)) %>% #' select(Species, starts_with("Sepal")) #' #' # if_any() and if_all() ---------------------------------------------------- #' iris %>% #' filter(if_any(ends_with("Width"), ~ . > 4)) #' iris %>% #' filter(if_all(ends_with("Width"), ~ . > 2)) #' #' @export #' @seealso [c_across()] for a function that returns a vector across <- function(.cols, .fns, ..., .names = NULL, .unpack = FALSE) { mask <- peek_mask() caller_env <- caller_env() across_if_fn <- context_peek_bare("across_if_fn") %||% "across" error_call <- context_peek_bare("across_frame") %||% current_env() .cols <- enquo(.cols) fns_quo <- enquo(.fns) fns_quo_env <- quo_get_env(fns_quo) if (quo_is_missing(.cols)) { across_missing_cols_deprecate_warn() .cols <- quo_set_expr(.cols, expr(everything())) } if (is_missing(.fns)) { # Silent restoration to old defaults of `.fns` for now. # TODO: Escalate this to formal deprecation. .fns <- NULL # Catch if dots are non-empty with no `.fns` supplied. # Mainly catches typos, e.g. `.funs` (#6638). check_dots_empty0(...) } else { .fns <- quo_eval_fns(fns_quo, mask = fns_quo_env, error_call = error_call) } if (!is_bool(.unpack) && !is_string(.unpack)) { stop_input_type(.unpack, "`TRUE`, `FALSE`, or a single string") } if (is_string(.unpack)) { unpack_spec <- .unpack .unpack <- TRUE } else { unpack_spec <- "{outer}_{inner}" } setup <- across_setup( cols = !!.cols, fns = .fns, names = .names, .caller_env = caller_env, mask = mask, error_call = error_call, across_if_fn = across_if_fn ) if (!missing(...)) { details <- c( "Supply arguments directly to `.fns` through an anonymous function instead.", "", " " = "# Previously", " " = "across(a:b, mean, na.rm = TRUE)", "", " " = "# Now", " " = "across(a:b, \\(x) mean(x, na.rm = TRUE))" ) lifecycle::deprecate_soft( when = "1.1.0", what = "across(...)", details = details ) } vars <- setup$vars if (length(vars) == 0L) { return(dplyr_new_tibble(list(), size = 1L)) } fns <- setup$fns names <- setup$names fns <- map(fns, function(fn) uninline(fn, fns_quo_env)) if (!length(fns)) { # TODO: Deprecate and remove the `.fns = NULL` path in favor of `pick()` data <- mask$pick_current(vars) if (is.null(names)) { return(data) } else { return(set_names(data, names)) } } data <- mask$current_cols(vars) n_cols <- length(data) n_fns <- length(fns) seq_n_cols <- seq_len(n_cols) seq_fns <- seq_len(n_fns) k <- 1L out <- vector("list", n_cols * n_fns) # Reset `cur_column()` info on exit old_var <- context_peek_bare("column") on.exit(context_poke("column", old_var), add = TRUE) # Loop in such an order that all functions are applied # to a single column before moving on to the next column withCallingHandlers( for (i in seq_n_cols) { var <- vars[[i]] col <- data[[i]] context_poke("column", var) for (j in seq_fns) { fn <- fns[[j]] out[[k]] <- fn(col, ...) k <- k + 1L } }, error = function(cnd) { bullets <- c( glue("Can't compute column `{names[k]}`.") ) abort(bullets, call = error_call, parent = cnd) } ) size <- vec_size_common(!!!out) out <- vec_recycle_common(!!!out, .size = size) names(out) <- names out <- dplyr_new_tibble(out, size = size) if (.unpack) { out <- df_unpack(out, unpack_spec, caller_env) } out } #' @rdname across #' @export if_any <- function(.cols, .fns, ..., .names = NULL) { context_local("across_if_fn", "if_any") context_local("across_frame", current_env()) if_across(`|`, across({{ .cols }}, .fns, ..., .names = .names)) } #' @rdname across #' @export if_all <- function(.cols, .fns, ..., .names = NULL) { context_local("across_if_fn", "if_all") context_local("across_frame", current_env()) if_across(`&`, across({{ .cols }}, .fns, ..., .names = .names)) } if_across <- function(op, df) { n <- nrow(df) if (!length(df)) { return(TRUE) } combine <- function(x, y) { if (is_null(x)) { y } else { op(x, y) } } reduce(df, combine, .init = NULL) } #' Combine values from multiple columns #' #' @description #' `c_across()` is designed to work with [rowwise()] to make it easy to #' perform row-wise aggregations. It has two differences from `c()`: #' #' * It uses tidy select semantics so you can easily select multiple variables. #' See `vignette("rowwise")` for more details. #' #' * It uses [vctrs::vec_c()] in order to give safer outputs. #' #' @inheritParams across #' @seealso [across()] for a function that returns a tibble. #' @export #' @examples #' df <- tibble(id = 1:4, w = runif(4), x = runif(4), y = runif(4), z = runif(4)) #' df %>% #' rowwise() %>% #' mutate( #' sum = sum(c_across(w:z)), #' sd = sd(c_across(w:z)) #' ) c_across <- function(cols) { mask <- peek_mask() cols <- enquo(cols) if (quo_is_missing(cols)) { c_across_missing_cols_deprecate_warn() cols <- quo_set_expr(cols, expr(everything())) } vars <- c_across_setup(!!cols, mask = mask) cols <- mask$current_cols(vars) vec_c(!!!cols, .name_spec = zap()) } across_glue_mask <- function(.col, .fn, .caller_env) { glue_mask <- env(.caller_env, .col = .col, .fn = .fn) # TODO: we can make these bindings louder later env_bind_active( glue_mask, col = function() glue_mask$.col, fn = function() glue_mask$.fn ) glue_mask } across_setup <- function(cols, fns, names, .caller_env, mask, error_call = caller_env(), across_if_fn = "across") { cols <- enquo(cols) # `across()` is evaluated in a data mask so we need to remove the # mask layer from the quosure environment (#5460) cols <- quo_set_env_to_data_mask_top(cols) # TODO: call eval_select with a calling handler to intercept # classed error, after https://github.com/r-lib/tidyselect/issues/233 if (is.null(fns) && quo_is_call(cols, "~")) { bullets <- c( "Must supply a column selection.", i = glue("You most likely meant: `{across_if_fn}(everything(), {as_label(cols)})`."), i = "The first argument `.cols` selects a set of columns.", i = "The second argument `.fns` operates on each selected columns." ) abort(bullets, call = error_call) } data <- mask$get_current_data(groups = FALSE) vars <- tidyselect::eval_select( cols, data = data, error_call = error_call ) names_vars <- names(vars) vars <- names(data)[vars] if (is.null(fns)) { # TODO: Eventually deprecate and remove the `.fns = NULL` path in favor of `pick()` if (!is.null(names)) { glue_mask <- across_glue_mask(.caller_env, .col = names_vars, .fn = "1") names <- vec_as_names( glue(names, .envir = glue_mask), repair = "check_unique", call = error_call ) } else { names <- names_vars } value <- list(vars = vars, fns = fns, names = names) return(value) } # apply `.names` smart default if (is.function(fns)) { names <- names %||% "{.col}" fns <- list("1" = fns) } else { names <- names %||% "{.col}_{.fn}" } if (!is.list(fns)) { abort("Expected a list.", .internal = TRUE) } # make sure fns has names, use number to replace unnamed if (is.null(names(fns))) { names_fns <- seq_along(fns) } else { names_fns <- names(fns) empties <- which(names_fns == "") if (length(empties)) { names_fns[empties] <- empties } } glue_mask <- across_glue_mask(.caller_env, .col = rep(names_vars, each = length(fns)), .fn = rep(names_fns , length(vars)) ) names <- vec_as_names( glue(names, .envir = glue_mask), repair = "check_unique", call = error_call ) list( vars = vars, fns = fns, names = names ) } uninline <- function(fn, env) { # Reset environment of inlinable lambdas which are set to the empty # env sentinel if (identical(get_env(fn), empty_env())) { set_env(fn, env) } else { fn } } # FIXME: This pattern should be encapsulated by rlang data_mask_top <- function(env, recursive = FALSE, inherit = FALSE) { while (env_has(env, ".__tidyeval_data_mask__.", inherit = inherit)) { env <- env_parent(env_get(env, ".top_env", inherit = inherit)) if (!recursive) { return(env) } } env } quo_set_env_to_data_mask_top <- function(quo) { env <- quo_get_env(quo) env <- data_mask_top(env, recursive = FALSE, inherit = FALSE) quo_set_env(quo, env) } c_across_setup <- function(cols, mask, error_call = caller_env()) { cols <- enquo(cols) # `c_across()` is evaluated in a data mask so we need to remove the # mask layer from the quosure environments (same as `across()`) (#5460, #6522) cols <- quo_set_env_to_data_mask_top(cols) data <- mask$get_current_data(groups = FALSE) vars <- tidyselect::eval_select( expr = cols, data = data, allow_rename = FALSE, error_call = error_call ) value <- names(vars) value } new_dplyr_quosure <- function(quo, ...) { attr(quo, "dplyr:::data") <- list2(...) quo } dplyr_quosure_name <- function(quo_data) { if (quo_data$is_named) { # `name` is a user-supplied or known character string quo_data$name } else { # `name` is a quosure that must be auto-named with_no_rlang_infix_labeling(as_label(quo_data$name)) } } dplyr_quosures <- function(...) { # We're using quos() instead of enquos() here for speed, because we're not defusing named arguments -- # only the ellipsis is converted to quosures, there are no further arguments. quosures <- quos(..., .ignore_empty = "all") names <- names2(quosures) for (i in seq_along(quosures)) { quosure <- quosures[[i]] name <- names[[i]] is_named <- (name != "") if (!is_named) { # Will be auto-named by `dplyr_quosure_name()` only as needed name <- quosure } quosures[[i]] <- new_dplyr_quosure( quo = quosure, name = name, is_named = is_named, index = i ) } quosures } # When mutate() or summarise() have an unnamed call to across() at the top level, e.g. # summarise(across(<...>)) or mutate(across(<...>)) # # a call to top_across(<...>) is evaluated instead. # top_across() returns a flattened list of expressions along with some # information about the "current column" for each expression # in the "columns" attribute: # # For example with: summarise(across(c(x, y), mean, .names = "mean_{.col}")) top_across() will return # something like: # # structure( # list(mean_x = expr(mean(x)), mean_y = expr(mean(y))) # columns = c("x", "y") # ) # Technically this always returns a single quosure but we wrap it in a # list to follow the pattern in `expand_across()` expand_if_across <- function(quo) { quo_data <- attr(quo, "dplyr:::data") if (!quo_is_call(quo, c("if_any", "if_all"), ns = c("", "dplyr"))) { return(list(quo)) } call <- match.call( definition = if_any, call = quo_get_expr(quo), expand.dots = FALSE, envir = quo_get_env(quo) ) if (!is_null(call$...)) { return(list(quo)) } if (is_call(call, "if_any")) { op <- "|" if_fn <- "if_any" } else { op <- "&" if_fn <- "if_all" } context_local("across_if_fn", if_fn) # Set frame here for backtrace truncation. But override error call # via `local_error_call()` so it refers to the function we're # expanding, e.g. `if_any()` and not `expand_if_across()`. context_local("across_frame", current_env()) local_error_call(call(if_fn)) call[[1]] <- quote(across) quos <- expand_across(quo_set_expr(quo, call)) # Select all rows if there are no inputs if (!length(quos)) { return(list(quo(TRUE))) } combine <- function(x, y) { if (is_null(x)) { y } else { call(op, x, y) } } expr <- reduce(quos, combine, .init = NULL) # Use `as_quosure()` instead of `new_quosure()` to avoid rewrapping # quosure in case of single input list(as_quosure(expr, env = baseenv())) } expand_across <- function(quo) { quo_data <- attr(quo, "dplyr:::data") if (!quo_is_call(quo, "across", ns = c("", "dplyr")) || quo_data$is_named) { return(list(quo)) } across_if_fn <- context_peek_bare("across_if_fn") %||% "across" # Set error call to frame for backtrace truncation, but override # call with the relevant function we're doing the expansion for error_call <- context_peek_bare("across_frame") %||% current_env() local_error_call(call(across_if_fn)) # Expand dots in lexical env env <- quo_get_env(quo) expr <- match.call( definition = across, call = quo_get_expr(quo), expand.dots = FALSE, envir = env ) # Abort expansion if there are any expression supplied because dots # must be evaluated once per group in the data mask. Expanding the # `across()` call would lead to either `n_group * n_col` evaluations # if dots are delayed or only 1 evaluation if they are eagerly # evaluated. if (!is_null(expr$...)) { return(list(quo)) } dplyr_mask <- peek_mask() mask <- dplyr_mask$get_rlang_mask() if (".unpack" %in% names(expr)) { # We're expanding expressions but we do need some actual values ahead of # time. We evaluate those in the mask to simulate masked evaluation of an # `across()` call within a verb like `mutate()`. `.names` and `.fns` are # also evaluated this way below. unpack <- eval_tidy(expr$.unpack, mask, env = env) } else { unpack <- FALSE } # Abort expansion if unpacking as expansion makes named expressions and we # need the expressions to remain unnamed if (!is_false(unpack)) { return(list(quo)) } # Differentiate between missing and null (`match.call()` doesn't # expand default argument) if (".cols" %in% names(expr)) { cols <- expr$.cols } else { across_missing_cols_deprecate_warn() cols <- expr(everything()) } cols <- as_quosure(cols, env) if (".fns" %in% names(expr)) { fns <- as_quosure(expr$.fns, env) fns <- quo_eval_fns(fns, mask = mask, error_call = error_call) } else { # In the missing case, silently restore the old default of `NULL`. # TODO: Escalate this to formal deprecation. fns <- NULL } setup <- across_setup( !!cols, fns = fns, names = eval_tidy(expr$.names, mask, env = env), .caller_env = env, mask = dplyr_mask, error_call = error_call, across_if_fn = across_if_fn ) vars <- setup$vars # Empty expansion if (length(vars) == 0L) { return(new_expanded_quosures(list())) } fns <- setup$fns names <- setup$names %||% vars # No functions, so just return a list of symbols if (is.null(fns)) { # TODO: Deprecate and remove the `.fns = NULL` path in favor of `pick()` expressions <- pmap(list(vars, names, seq_along(vars)), function(var, name, k) { quo <- new_quosure(sym(var), empty_env()) quo <- new_dplyr_quosure( quo, name = name, is_named = TRUE, index = c(quo_data$index, k), column = var ) }) names(expressions) <- names expressions <- new_expanded_quosures(expressions) return(expressions) } n_vars <- length(vars) n_fns <- length(fns) seq_vars <- seq_len(n_vars) seq_fns <- seq_len(n_fns) expressions <- vector(mode = "list", n_vars * n_fns) columns <- character(n_vars * n_fns) k <- 1L for (i in seq_vars) { var <- vars[[i]] for (j in seq_fns) { fn_call <- as_across_fn_call(fns[[j]], var, env, mask) name <- names[[k]] expressions[[k]] <- new_dplyr_quosure( fn_call, name = name, is_named = TRUE, index = c(quo_data$index, k), column = var ) k <- k + 1L } } names(expressions) <- names new_expanded_quosures(expressions) } new_expanded_quosures <- function(x) { structure(x, class = "dplyr_expanded_quosures") } as_across_fn_call <- function(fn, var, env, mask) { if (is_inlinable_lambda(fn)) { # Transform inlinable lambdas to simple quosured calls arg <- names(formals(fn))[[1]] expr <- body(fn) expr <- expr_substitute(expr, sym(arg), sym(var)) new_quosure(expr, env) } else { # Non-inlinable elements are wrapped in a quosured call. It's # important that these are set to their original quosure # environment (passed as `env`) because we change non-inlinable # lambdas to inherit from the data mask in order to make them # maskable. By wrapping them in a quosured call that inherits from # the original quosure environment that wrapped the expanded # `across()` call, we cause `eval_tidy()` to chains this # environment to the top of the data mask, thereby preserving the # lexical environment of the lambda when it is evaluated. new_quosure(call2(fn, sym(var)), env) } } # The environment of functions that are safe to inline has been set to # the empty env sentinel is_inlinable_lambda <- function(x) { is_function(x) && identical(fn_env(x), empty_env()) } across_missing_cols_deprecate_warn <- function() { across_if_fn <- context_peek_bare("across_if_fn") %||% "across" # Passing the correct `user_env` through `expand_across()` to here is # complicated, so instead we force the global environment. This means users # won't ever see the "deprecated feature was likely used in the {pkg}" # message, but the warning will still fire and that is more important. user_env <- global_env() lifecycle::deprecate_warn( when = "1.1.0", what = I(glue("Using `{across_if_fn}()` without supplying `.cols`")), details = "Please supply `.cols` instead.", user_env = user_env ) } c_across_missing_cols_deprecate_warn <- function(user_env = caller_env(2)) { lifecycle::deprecate_warn( when = "1.1.0", what = I("Using `c_across()` without supplying `cols`"), details = "Please supply `cols` instead.", user_env = user_env ) } df_unpack <- function(x, spec, caller_env, error_call = caller_env()) { size <- vec_size(x) out <- dplyr_new_list(x) names <- names(out) loc <- which(map_lgl(out, is.data.frame)) cols <- out[loc] col_names <- names[loc] out[loc] <- map2( .x = cols, .y = col_names, .f = apply_unpack_spec, spec = spec, caller_env = caller_env ) # Signal to `df_list()` that these columns should be unpacked names[loc] <- "" names(out) <- names out <- df_list(!!!out, .size = size, .name_repair = "minimal") out <- dplyr_new_tibble(out, size = size) vec_as_names(names(out), repair = "check_unique", call = error_call) out } apply_unpack_spec <- function(col, outer, spec, caller_env) { inner <- names(col) outer <- vec_rep(outer, times = length(inner)) mask <- env(caller_env, outer = outer, inner = inner) inner <- glue(spec, .envir = mask) inner <- as.character(inner) names(col) <- inner col } # Evaluate the quosure of the `.fns` argument # # We detect and mark inlinable lambdas here. By lambda we mean either # a `~` or `function` call that is directly supplied to # `across()`. Lambdas haven't been evaluated yet and don't carry an # environment. # # Inlinable lambdas are eventually expanded in the surrounding call. # To distinguish inlinable lambdas from non-inlinable ones, we set # their environments to the empty env. # # There are cases where we can't inline, for instance lambdas that are # passed additional arguments through `...`. We still want these # non-inlinable lambdas to be maskable so that they can refer to # data-mask columns. So we set them (a) in the evaluation case, to # their original quosure environment which is the data mask, or (b) in # the expansion case, to the uninitialised data mask. # # @value | >. Inlinable lambdas are set to the # empty env. quo_eval_fns <- function(quo, mask, error_call = caller_env()) { # In the evaluation path (as opposed to expansion), the quosure # inherits from the data mask. We set the environment to the data # mask top (the original quosure environment) so that we don't # evaluate the function expressions in the mask. This prevents # masking a function symbol (e.g. `mean`) by a column of the same # name. quo <- quo_set_env_to_data_mask_top(quo) # The following strange scheme is a work around to reconciliate two # contradictory goals. We want to evaluate outside the mask so that # data mask columns are not confused with functions (#6545). # However at the same time we want non-inlinable lambdas (inlinable # ones are dealt with above) to be maskable so they can refer to # data mask columns. So we evaluate outside the mask, in a data-less # quosure mask that handles quosures. Then, in `validate()`, we # detect lambdas that inherit from this quosure mask and set their # environment to the data mask. sentinel_env <- empty_env() out <- eval_tidy(quo({ sentinel_env <<- current_env() !!quo })) validate <- function(x) { if (is_formula(x) || is_function(x)) { # If the function or formula inherits from the data-less quosure # mask, we have a lambda that was directly supplied and # evaluated here. We inline it if possible. if (identical(get_env(x), sentinel_env)) { if (is_inlinable_function(x)) { return(set_env(x, empty_env())) } if (is_inlinable_formula(x)) { x <- expr_substitute(x, quote(.), quote(.x)) fn <- new_function(pairlist2(.x = ), f_rhs(x), empty_env()) return(fn) } # Can't inline the lambda. We set its environment to the data # mask so it can still refer to columns. x <- set_env(x, mask) } as_function(x, arg = ".fns", call = error_call) } else { abort( "`.fns` must be a function, a formula, or a list of functions/formulas.", call = error_call ) } } if (obj_is_list(out)) { map(out, function(elt) validate(elt)) } else { validate(out) } } is_inlinable_function <- function(x) { if (!is_function(x)) { return(FALSE) } fmls <- formals(x) # Don't inline if there are additional arguments even if they have # defaults or are passed through `...` if (length(fmls) != 1) { return(FALSE) } # Don't inline lambdas that call `return()` at the moment a few # packages do things like `across(1, function(x) # return(x))`. Whereas `eval()` sets a return point, `eval_tidy()` # doesn't which causes `return()` to throw an error. if ("return" %in% all.names(body(x))) { return(FALSE) } TRUE } is_inlinable_formula <- function(x) { if (!is_formula(x, lhs = FALSE)) { return(FALSE) } # Don't inline if there are additional arguments passed through `...` nms <- all.names(x) unsupported_arg_rx <- "\\.\\.[0-9]|\\.y" if (any(grepl(unsupported_arg_rx, nms))) { return(FALSE) } # Don't inline lambdas that call `return()` at the moment, see above if ("return" %in% nms) { return(FALSE) } TRUE } dplyr/R/filter.R0000644000176200001440000002254614406402754013234 0ustar liggesusers#' Keep rows that match a condition #' #' The `filter()` function is used to subset a data frame, #' retaining all rows that satisfy your conditions. #' To be retained, the row must produce a value of `TRUE` for all conditions. #' Note that when a condition evaluates to `NA` #' the row will be dropped, unlike base subsetting with `[`. #' #' The `filter()` function is used to subset the rows of #' `.data`, applying the expressions in `...` to the column values to determine which #' rows should be retained. It can be applied to both grouped and ungrouped data (see [group_by()] and #' [ungroup()]). However, dplyr is not yet smart enough to optimise the filtering #' operation on grouped datasets that do not need grouped calculations. For this #' reason, filtering is often considerably faster on ungrouped data. #' #' @section Useful filter functions: #' #' There are many functions and operators that are useful when constructing the #' expressions used to filter the data: #' #' * [`==`], [`>`], [`>=`] etc #' * [`&`], [`|`], [`!`], [xor()] #' * [is.na()] #' * [between()], [near()] #' #' @section Grouped tibbles: #' #' Because filtering expressions are computed within groups, they may #' yield different results on grouped tibbles. This will be the case #' as soon as an aggregating, lagging, or ranking function is #' involved. Compare this ungrouped filtering: #' #' ``` #' starwars %>% filter(mass > mean(mass, na.rm = TRUE)) #' ``` #' #' With the grouped equivalent: #' #' ``` #' starwars %>% group_by(gender) %>% filter(mass > mean(mass, na.rm = TRUE)) #' ``` #' #' In the ungrouped version, `filter()` compares the value of `mass` in each row to #' the global average (taken over the whole data set), keeping only the rows with #' `mass` greater than this global average. In contrast, the grouped version calculates #' the average mass separately for each `gender` group, and keeps rows with `mass` greater #' than the relevant within-gender average. #' #' @family single table verbs #' @inheritParams arrange #' @inheritParams args_by #' @param ... <[`data-masking`][rlang::args_data_masking]> Expressions that #' return a logical value, and are defined in terms of the variables in #' `.data`. If multiple expressions are included, they are combined with the #' `&` operator. Only rows for which all conditions evaluate to `TRUE` are #' kept. #' @param .preserve Relevant when the `.data` input is grouped. #' If `.preserve = FALSE` (the default), the grouping structure #' is recalculated based on the resulting data, otherwise the grouping is kept as is. #' @return #' An object of the same type as `.data`. The output has the following properties: #' #' * Rows are a subset of the input, but appear in the same order. #' * Columns are not modified. #' * The number of groups may be reduced (if `.preserve` is not `TRUE`). #' * Data frame attributes are preserved. #' #' @section Methods: #' This function is a **generic**, which means that packages can provide #' implementations (methods) for other classes. See the documentation of #' individual methods for extra arguments and differences in behaviour. #' #' The following methods are currently available in loaded packages: #' \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("filter")}. #' @export #' @examples #' # Filtering by one criterion #' filter(starwars, species == "Human") #' filter(starwars, mass > 1000) #' #' # Filtering by multiple criteria within a single logical expression #' filter(starwars, hair_color == "none" & eye_color == "black") #' filter(starwars, hair_color == "none" | eye_color == "black") #' #' # When multiple expressions are used, they are combined using & #' filter(starwars, hair_color == "none", eye_color == "black") #' #' #' # The filtering operation may yield different results on grouped #' # tibbles because the expressions are computed within groups. #' # #' # The following filters rows where `mass` is greater than the #' # global average: #' starwars %>% filter(mass > mean(mass, na.rm = TRUE)) #' #' # Whereas this keeps rows with `mass` greater than the gender #' # average: #' starwars %>% group_by(gender) %>% filter(mass > mean(mass, na.rm = TRUE)) #' #' #' # To refer to column names that are stored as strings, use the `.data` pronoun: #' vars <- c("mass", "height") #' cond <- c(80, 150) #' starwars %>% #' filter( #' .data[[vars[[1]]]] > cond[[1]], #' .data[[vars[[2]]]] > cond[[2]] #' ) #' # Learn more in ?rlang::args_data_masking filter <- function(.data, ..., .by = NULL, .preserve = FALSE) { check_by_typo(...) by <- enquo(.by) if (!quo_is_null(by) && !is_false(.preserve)) { abort("Can't supply both `.by` and `.preserve`.") } UseMethod("filter") } #' @export filter.data.frame <- function(.data, ..., .by = NULL, .preserve = FALSE) { dots <- dplyr_quosures(...) check_filter(dots) by <- compute_by( by = {{ .by }}, data = .data, by_arg = ".by", data_arg = ".data" ) loc <- filter_rows(.data, dots, by) dplyr_row_slice(.data, loc, preserve = .preserve) } filter_rows <- function(data, dots, by, error_call = caller_env(), user_env = caller_env(2)) { error_call <- dplyr_error_call(error_call) mask <- DataMask$new(data, by, "filter", error_call = error_call) on.exit(mask$forget(), add = TRUE) dots <- filter_expand(dots, mask = mask, error_call = error_call) filter_eval(dots, mask = mask, error_call = error_call, user_env = user_env) } check_filter <- function(dots, error_call = caller_env()) { named <- have_name(dots) for (i in which(named)) { quo <- dots[[i]] # only allow named logical vectors, anything else # is suspicious expr <- quo_get_expr(quo) if (!is.logical(expr)) { name <- names(dots)[i] bullets <- c( "We detected a named input.", i = glue("This usually means that you've used `=` instead of `==`."), i = glue("Did you mean `{name} == {as_label(expr)}`?") ) abort(bullets, call = error_call) } } } filter_expand <- function(dots, mask, error_call = caller_env()) { env_filter <- env() filter_expand_one <- function(dot, index) { env_filter$current_expression <- index dot <- expand_pick(dot, mask) expand_if_across(dot) } local_error_context(dots, i = 0L, mask = mask) dots <- withCallingHandlers( imap(unname(dots), filter_expand_one), error = function(cnd) { poke_error_context(dots, env_filter$current_expression, mask = mask) abort(cnd_bullet_header("expand"), call = error_call, parent = cnd) } ) dots <- list_flatten(dots) new_quosures(dots) } filter_eval <- function(dots, mask, error_call = caller_env(), user_env = caller_env(2)) { env_filter <- env() warnings_state <- env(warnings = list()) # For condition handlers i <- NULL env_bind_active( current_env(), "i" = function() env_filter$current_expression ) warning_handler <- dplyr_warning_handler( state = warnings_state, mask = mask, error_call = error_call ) out <- withCallingHandlers( mask$eval_all_filter(dots, env_filter), error = dplyr_error_handler( dots = dots, mask = mask, bullets = filter_bullets, error_call = error_call ), warning = function(cnd) { local_error_context(dots, i, mask) warning_handler(cnd) }, `dplyr:::signal_filter_one_column_matrix` = function(e) { warn_filter_one_column_matrix(env = error_call, user_env = user_env) }, `dplyr:::signal_filter_across` = function(e) { warn_filter_across(env = error_call, user_env = user_env) }, `dplyr:::signal_filter_data_frame` = function(e) { warn_filter_data_frame(env = error_call, user_env = user_env) } ) signal_warnings(warnings_state, error_call) out } filter_bullets <- function(cnd, ...) { UseMethod("filter_bullets") } #' @export `filter_bullets.dplyr:::filter_incompatible_type` <- function(cnd, ...) { column_name <- cnd$dplyr_error_data$column_name index <- cnd$dplyr_error_data$index result <- cnd$dplyr_error_data$result if (is.null(column_name)) { input_name <- glue("..{index}") } else { input_name <- glue("..{index}${column_name}") } glue("`{input_name}` must be a logical vector, not {obj_type_friendly(result)}.") } #' @export `filter_bullets.dplyr:::filter_incompatible_size` <- function(cnd, ...) { index <- cnd$dplyr_error_data$index expected_size <- cnd$dplyr_error_data$expected_size size <- cnd$dplyr_error_data$size glue("`..{index}` must be of size {or_1(expected_size)}, not size {size}.") } warn_filter_one_column_matrix <- function(env, user_env) { lifecycle::deprecate_warn( when = "1.1.0", what = I("Using one column matrices in `filter()`"), with = I("one dimensional logical vectors"), env = env, user_env = user_env ) } warn_filter_across <- function(env, user_env) { lifecycle::deprecate_warn( when = "1.0.8", what = I("Using `across()` in `filter()`"), with = I("`if_any()` or `if_all()`"), always = TRUE, env = env, user_env = user_env ) } warn_filter_data_frame <- function(env, user_env) { lifecycle::deprecate_warn( when = "1.0.8", what = I("Returning data frames from `filter()` expressions"), with = I("`if_any()` or `if_all()`"), always = TRUE, env = env, user_env = user_env ) } dplyr/R/group-split.R0000644000176200001440000000667714366556340014251 0ustar liggesusers#' Split data frame by groups #' #' @description #' `r lifecycle::badge("experimental")` #' #' [group_split()] works like [base::split()] but: #' #' - It uses the grouping structure from [group_by()] and therefore is subject #' to the data mask #' #' - It does not name the elements of the list based on the grouping as this #' only works well for a single character grouping variable. Instead, #' use [group_keys()] to access a data frame that defines the groups. #' #' `group_split()` is primarily designed to work with grouped data frames. #' You can pass `...` to group and split an ungrouped data frame, but this #' is generally not very useful as you want have easy access to the group #' metadata. #' #' @section Lifecycle: #' `group_split()` is not stable because you can achieve very similar results by #' manipulating the nested column returned from #' [`tidyr::nest(.by =)`][tidyr::nest()]. That also retains the group keys all #' within a single data structure. `group_split()` may be deprecated in the #' future. #' #' @param .tbl A tbl. #' @param ... If `.tbl` is an ungrouped data frame, a grouping specification, #' forwarded to [group_by()]. #' @param .keep Should the grouping columns be kept? #' @returns A list of tibbles. Each tibble contains the rows of `.tbl` for the #' associated group and all the columns, including the grouping variables. #' Note that this returns a [list_of][vctrs::list_of()] which is slightly #' stricter than a simple list but is useful for representing lists where #' every element has the same type. #' @keywords internal #' @family grouping functions #' @export #' @examples #' ir <- iris %>% group_by(Species) #' #' group_split(ir) #' group_keys(ir) group_split <- function(.tbl, ..., .keep = TRUE) { lifecycle::signal_stage("experimental", "group_split()") UseMethod("group_split") } #' @export group_split.data.frame <- function(.tbl, ..., .keep = TRUE, keep = deprecated()) { if (!missing(keep)) { lifecycle::deprecate_warn("1.0.0", "group_split(keep = )", "group_split(.keep = )", always = TRUE) .keep <- keep } data <- group_by(.tbl, ...) group_split_impl(data, .keep = .keep) } #' @export group_split.rowwise_df <- function(.tbl, ..., .keep = TRUE, keep = deprecated()) { if (dots_n(...)) { warn("... is ignored in group_split(), please use as_tibble() %>% group_split(...)") } if (!missing(keep)) { lifecycle::deprecate_warn("1.0.0", "group_split(keep = )", "group_split(.keep = )", always = TRUE) .keep <- keep } if (!missing(.keep)) { warn(".keep is ignored in group_split()") } group_split_impl(.tbl, .keep = TRUE) } #' @export group_split.grouped_df <- function(.tbl, ..., .keep = TRUE, keep = deprecated()) { if (!missing(keep)) { lifecycle::deprecate_warn("1.0.0", "group_split(keep = )", "group_split(.keep = )", always = TRUE) .keep <- keep } if (dots_n(...)) { warn("... is ignored in group_split(), please use group_by(..., .add = TRUE) %>% group_split()") } group_split_impl(.tbl, .keep = .keep) } group_split_impl <- function(data, .keep) { out <- ungroup(data) indices <- group_rows(data) if (!isTRUE(.keep)) { remove <- group_vars(data) .keep <- names(out) .keep <- setdiff(.keep, remove) out <- out[.keep] } dplyr_chop(out, indices) } dplyr_chop <- function(data, indices) { out <- map(indices, dplyr_row_slice, data = data) out <- new_list_of(out, ptype = vec_ptype(data)) out } dplyr/R/coalesce.R0000644000176200001440000000447014525503021013510 0ustar liggesusers#' Find the first non-missing element #' #' Given a set of vectors, `coalesce()` finds the first non-missing value at #' each position. It's inspired by the SQL `COALESCE` function which does the #' same thing for SQL `NULL`s. #' #' @param ... <[`dynamic-dots`][rlang::dyn-dots]> #' #' One or more vectors. These will be #' [recycled][vctrs::theory-faq-recycling] against each other, and will be #' cast to their common type. #' #' @param .ptype An optional prototype declaring the desired output type. If #' supplied, this overrides the common type of the vectors in `...`. #' #' @param .size An optional size declaring the desired output size. If supplied, #' this overrides the common size of the vectors in `...`. #' #' @return A vector with the same type and size as the common type and common #' size of the vectors in `...`. #' #' @seealso [na_if()] to replace specified values with an `NA`. #' [tidyr::replace_na()] to replace `NA` with a value. #' #' @export #' @examples #' # Use a single value to replace all missing values #' x <- sample(c(1:5, NA, NA, NA)) #' coalesce(x, 0L) #' #' # The equivalent to a missing value in a list is `NULL` #' coalesce(list(1, 2, NULL), list(NA)) #' #' # Or generate a complete vector from partially missing pieces #' y <- c(1, 2, NA, NA, 5) #' z <- c(NA, NA, 3, 4, 5) #' coalesce(y, z) #' #' # Supply lists by splicing them into dots: #' vecs <- list( #' c(1, 2, NA, NA, 5), #' c(NA, NA, 3, 4, 5) #' ) #' coalesce(!!!vecs) coalesce <- function(..., .ptype = NULL, .size = NULL) { args <- list2(...) if (vec_any_missing(args)) { # Drop `NULL`s not_missing <- !vec_detect_missing(args) args <- vec_slice(args, not_missing) } if (length(args) == 0L) { abort("`...` can't be empty.") } # Recycle early so logical conditions computed below will be the same length, # as required by `vec_case_when()` args <- vec_recycle_common(!!!args, .size = .size) # Name early to get correct indexing in `vec_case_when()` error messages names <- names2(args) names <- names_as_error_names(names) args <- set_names(args, names) conditions <- map(args, ~{ !vec_detect_missing(.x) }) vec_case_when( conditions = conditions, values = args, conditions_arg = "", values_arg = "", ptype = .ptype, size = .size, call = current_env() ) } dplyr/R/desc.R0000644000176200001440000000065214406402754012657 0ustar liggesusers#' Descending order #' #' Transform a vector into a format that will be sorted in descending order. #' This is useful within [arrange()]. #' #' @param x vector to transform #' @export #' @examples #' desc(1:10) #' desc(factor(letters)) #' #' first_day <- seq(as.Date("1910/1/1"), as.Date("1920/1/1"), "years") #' desc(first_day) #' #' starwars %>% arrange(desc(mass)) desc <- function(x) { obj_check_vector(x) -xtfrm(x) } dplyr/R/join-common-by.R0000644000176200001440000000340314366556340014601 0ustar liggesusers#' Extract out common by variables #' #' @export #' @keywords internal common_by <- function(by = NULL, x, y) UseMethod("common_by", by) #' @export common_by.character <- function(by, x, y) { by <- common_by_from_vector(by) common_by.list(by, x, y) } common_by_from_vector <- function(by) { by <- by[!duplicated(by)] by_x <- names(by) %||% by by_y <- unname(by) # If x partially named, assume unnamed are the same in both tables by_x[by_x == ""] <- by_y[by_x == ""] list(x = by_x, y = by_y) } #' @export common_by.list <- function(by, x, y) { x_vars <- tbl_vars(x) if (!all(by$x %in% x_vars)) { msg <- glue("`by` can't contain join column {missing} which is missing from LHS.", missing = fmt_obj(setdiff(by$x, x_vars)) ) abort(msg) } y_vars <- tbl_vars(y) if (!all(by$y %in% y_vars)) { msg <- glue("`by` can't contain join column {missing} which is missing from RHS.", missing = fmt_obj(setdiff(by$y, y_vars)) ) abort(msg) } by } #' @export common_by.NULL <- function(by, x, y) { by <- intersect(tbl_vars(x), tbl_vars(y)) by <- by[!is.na(by)] if (length(by) == 0) { msg <- glue("`by` required, because the data sources have no common variables.") abort(msg) } inform(auto_by_msg(by)) list( x = by, y = by ) } auto_by_msg <- function(by) { by_quoted <- encodeString(by, quote = '"') if (length(by_quoted) == 1L) { by_code <- by_quoted } else { by_code <- paste0("c(", paste(by_quoted, collapse = ", "), ")") } paste0("Joining, by = ", by_code) } #' @export common_by.default <- function(by, x, y) { msg <- glue("`by` must be a (named) character vector, list, or NULL for natural joins (not recommended in production code), not {obj_type_friendly(by)}.") abort(msg) } dplyr/R/pick.R0000644000176200001440000001550614406402754012673 0ustar liggesusers#' Select a subset of columns #' #' @description #' `pick()` provides a way to easily select a subset of columns from your data #' using [select()] semantics while inside a #' ["data-masking"][rlang::args_data_masking] function like [mutate()] or #' [summarise()]. `pick()` returns a data frame containing the selected columns #' for the current group. #' #' `pick()` is complementary to [across()]: #' - With `pick()`, you typically apply a function to the full data frame. #' - With `across()`, you typically apply a function to each column. #' #' @details #' Theoretically, `pick()` is intended to be replaceable with an equivalent call #' to `tibble()`. For example, `pick(a, c)` could be replaced with #' `tibble(a = a, c = c)`, and `pick(everything())` on a data frame with cols #' `a`, `b`, and `c` could be replaced with `tibble(a = a, b = b, c = c)`. #' `pick()` specially handles the case of an empty selection by returning a 1 #' row, 0 column tibble, so an exact replacement is more like: #' #' ``` #' size <- vctrs::vec_size_common(..., .absent = 1L) #' out <- vctrs::vec_recycle_common(..., .size = size) #' tibble::new_tibble(out, nrow = size) #' ``` #' #' @param ... <[`tidy-select`][dplyr_tidy_select]> #' #' Columns to pick. #' #' You can't pick grouping columns because they are already automatically #' handled by the verb (i.e. [summarise()] or [mutate()]). #' #' @returns #' A tibble containing the selected columns for the current group. #' #' @seealso [across()] #' @export #' @examples #' df <- tibble( #' x = c(3, 2, 2, 2, 1), #' y = c(0, 2, 1, 1, 4), #' z1 = c("a", "a", "a", "b", "a"), #' z2 = c("c", "d", "d", "a", "c") #' ) #' df #' #' # `pick()` provides a way to select a subset of your columns using #' # tidyselect. It returns a data frame. #' df %>% mutate(cols = pick(x, y)) #' #' # This is useful for functions that take data frames as inputs. #' # For example, you can compute a joint rank between `x` and `y`. #' df %>% mutate(rank = dense_rank(pick(x, y))) #' #' # `pick()` is also useful as a bridge between data-masking functions (like #' # `mutate()` or `group_by()`) and functions with tidy-select behavior (like #' # `select()`). For example, you can use `pick()` to create a wrapper around #' # `group_by()` that takes a tidy-selection of columns to group on. For more #' # bridge patterns, see #' # https://rlang.r-lib.org/reference/topic-data-mask-programming.html#bridge-patterns. #' my_group_by <- function(data, cols) { #' group_by(data, pick({{ cols }})) #' } #' #' df %>% my_group_by(c(x, starts_with("z"))) #' #' # Or you can use it to dynamically select columns to `count()` by #' df %>% count(pick(starts_with("z"))) pick <- function(...) { # This is the evaluation fallback for `pick()`, which runs: # - When users call `pick()` outside of a mutate-like context. # - When users wrap `pick()` into their own helper functions, preventing # `pick()` expansion from occurring. mask <- peek_mask() if (dots_n(...) == 0L) { stop_pick_empty() } # Evaluates `pick()` on current columns. # Mimicking expansion as much as possible, which should match the idea of # replacing the `pick()` call directly with `tibble()`, like: # pick(a, b, starts_with("foo")) -> tibble(a = a, b = b, foo1 = foo1) non_group_vars <- mask$current_non_group_vars() data <- mask$current_cols(non_group_vars) # `pick()` is evaluated in a data mask so we need to remove the # mask layer from the quosure environments (same as `across()`) (#5460) quos <- enquos(..., .named = NULL) quos <- map(quos, quo_set_env_to_data_mask_top) expr <- expr(c(!!!quos)) sel <- tidyselect::eval_select( expr = expr, data = data, allow_rename = FALSE ) data <- data[sel] data <- dplyr_pick_tibble(!!!data) data } # ------------------------------------------------------------------------------ expand_pick <- function(quo, mask) { error_call <- call("pick") out <- expand_pick_quo(quo, mask, error_call = error_call) out <- new_dplyr_quosure(out, !!!attr(quo, "dplyr:::data")) out } expand_pick_quo <- function(quo, mask, error_call = caller_env()) { env <- quo_get_env(quo) expr <- quo_get_expr(quo) if (is_missing(expr)) { return(quo) } if (is_quosure(expr)) { expr <- expand_pick_quo(expr, mask, error_call = error_call) } else if (is_call(expr)) { expr <- expand_pick_call(expr, env, mask, error_call = error_call) } new_quosure(expr, env = env) } expand_pick_call <- function(expr, env, mask, error_call = caller_env()) { if (is_call(expr, name = "pick", ns = c("", "dplyr"))) { expr <- as_pick_selection(expr, error_call) out <- eval_pick(expr, env, mask, error_call) out <- as_pick_expansion(out) return(out) } if (is_call(expr, name = c("~", "function"))) { # Never expand across anonymous function boundaries return(expr) } index <- seq2(2L, length(expr)) for (i in index) { elt <- expr[[i]] if (is_missing(elt)) { next } if (is_quosure(elt)) { expr[[i]] <- expand_pick_quo(elt, mask, error_call = error_call) } else if (is_call(elt)) { expr[[i]] <- expand_pick_call(elt, env, mask, error_call = error_call) } } expr } eval_pick <- function(expr, env, mask, error_call = caller_env()) { # Evaluates `pick()` on the full version of the "current" columns. # Remove grouping variables, which are never allowed to be selected as # variables to `pick()`. This includes variables specified in # `rowwise(.data, ...)`. data <- mask$get_current_data(groups = FALSE) out <- tidyselect::eval_select( expr = expr, env = env, data = data, error_call = error_call, allow_rename = FALSE ) names(out) } as_pick_selection <- function(expr, error_call) { # Drop `pick()`, get the arguments expr <- expr[-1] if (is.null(expr)) { stop_pick_empty(call = error_call) } # Turn arguments into list of expressions expr <- as.list(expr) # Inline into `c()` call for tidy-selection expr <- expr(c(!!!expr)) expr } as_pick_expansion <- function(names) { out <- set_names(syms(names), names) expr(asNamespace("dplyr")$dplyr_pick_tibble(!!!out)) } dplyr_pick_tibble <- function(...) { error_call <- call("pick") out <- list2(...) # Allow recycling between selected columns, in case it is called from # a `reframe()` call that modified columns in an earlier expression like # `reframe(df, x = 1, y = pick(x, z))`. This also closely mimics expansion # into `y = tibble(x, z)`, with an empty selection being an exception that # is like `y = tibble(.rows = 1L)` for recycling purposes (#6685). size <- vec_size_common(!!!out, .absent = 1L, .call = error_call) out <- vec_recycle_common(!!!out, .size = size, .call = error_call) dplyr_new_tibble(out, size = size) } stop_pick_empty <- function(call = caller_env()) { abort("Must supply at least one input to `pick()`.", call = call) } dplyr/R/sample.R0000644000176200001440000001150214472225345013220 0ustar liggesusers#' Sample n rows from a table #' #' @description #' `r lifecycle::badge("superseded")` #' `sample_n()` and `sample_frac()` have been superseded in favour of #' [slice_sample()]. While they will not be deprecated in the near future, #' retirement means that we will only perform critical bug fixes, so we recommend #' moving to the newer alternative. #' #' These functions were superseded because we realised it was more convenient to #' have two mutually exclusive arguments to one function, rather than two #' separate functions. This also made it to clean up a few other smaller #' design issues with `sample_n()`/`sample_frac`: #' #' * The connection to `slice()` was not obvious. #' * The name of the first argument, `tbl`, is inconsistent with other #' single table verbs which use `.data`. #' * The `size` argument uses tidy evaluation, which is surprising and #' undocumented. #' * It was easier to remove the deprecated `.env` argument. #' * `...` was in a suboptimal position. #' #' @keywords internal #' @param tbl A data.frame. #' @param size <[`tidy-select`][dplyr_tidy_select]> #' For `sample_n()`, the number of rows to select. #' For `sample_frac()`, the fraction of rows to select. #' If `tbl` is grouped, `size` applies to each group. #' @param replace Sample with or without replacement? #' @param weight <[`tidy-select`][dplyr_tidy_select]> Sampling weights. #' This must evaluate to a vector of non-negative numbers the same length as #' the input. Weights are automatically standardised to sum to 1. #' @param .env DEPRECATED. #' @param ... ignored #' @examples #' df <- tibble(x = 1:5, w = c(0.1, 0.1, 0.1, 2, 2)) #' #' # sample_n() -> slice_sample() ---------------------------------------------- #' # Was: #' sample_n(df, 3) #' sample_n(df, 10, replace = TRUE) #' sample_n(df, 3, weight = w) #' #' # Now: #' slice_sample(df, n = 3) #' slice_sample(df, n = 10, replace = TRUE) #' slice_sample(df, n = 3, weight_by = w) #' #' # Note that sample_n() would error if n was bigger than the group size #' # slice_sample() will just use the available rows for consistency with #' # the other slice helpers like slice_head() #' try(sample_n(df, 10)) #' slice_sample(df, n = 10) #' #' # sample_frac() -> slice_sample() ------------------------------------------- #' # Was: #' sample_frac(df, 0.25) #' sample_frac(df, 2, replace = TRUE) #' #' # Now: #' slice_sample(df, prop = 0.25) #' slice_sample(df, prop = 2, replace = TRUE) #' @export sample_n <- function(tbl, size, replace = FALSE, weight = NULL, .env = NULL, ...) { lifecycle::signal_stage("superseded", "sample_n()") UseMethod("sample_n") } #' @export sample_n.default <- function(tbl, size, replace = FALSE, weight = NULL, .env = parent.frame(), ...) { msg <- glue("`tbl` must be a data frame, not {obj_type_friendly(tbl)}.") abort(msg) } #' @export sample_n.data.frame <- function(tbl, size, replace = FALSE, weight = NULL, .env = NULL, ...) { if (!is_null(.env)) { inform("`sample_n()` argument `.env` is deprecated and no longer has any effect.") } size <- enquo(size) weight <- enquo(weight) dplyr_local_error_call() slice(tbl, local({ size <- check_size(!!size, n(), replace = replace) sample.int(n(), size, replace = replace, prob = !!weight) })) } #' @rdname sample_n #' @export sample_frac <- function(tbl, size = 1, replace = FALSE, weight = NULL, .env = NULL, ...) { lifecycle::signal_stage("superseded", "sample_frac()") UseMethod("sample_frac") } #' @export sample_frac.default <- function(tbl, size = 1, replace = FALSE, weight = NULL, .env = parent.frame(), ...) { msg <- glue("`tbl` must be a data frame, not {obj_type_friendly(tbl)}.") abort(msg) } #' @export sample_frac.data.frame <- function(tbl, size = 1, replace = FALSE, weight = NULL, .env = NULL, ...) { if (!is_null(.env)) { inform("`.env` is deprecated and no longer has any effect") } size <- enquo(size) weight <- enquo(weight) dplyr_local_error_call() slice(tbl, local({ size <- round(n() * check_frac(!!size, replace = replace)) sample.int(n(), size, replace = replace, prob = !!weight) })) } # Helper functions ------------------------------------------------------------- check_size <- function(size, n, replace = FALSE) { if (size <= n || replace) return(invisible(size)) bullets <- c( glue("`size` must be less than or equal to {n} (size of data)."), i = "set `replace = TRUE` to use sampling with replacement." ) abort(bullets, call = NULL) } check_frac <- function(size, replace = FALSE) { if (size <= 1 || replace) return(invisible(size)) bullets <- c( glue("`size` of sampled fraction must be less or equal to one."), i = "set `replace = TRUE` to use sampling with replacement." ) abort(bullets, call = NULL) } dplyr/R/relocate.R0000644000176200001440000001177114366556340013551 0ustar liggesusers#' Change column order #' #' Use `relocate()` to change column positions, using the same syntax as #' `select()` to make it easy to move blocks of columns at once. #' #' @inheritParams arrange #' @param ... <[`tidy-select`][dplyr_tidy_select]> Columns to move. #' @param .before,.after <[`tidy-select`][dplyr_tidy_select]> Destination of #' columns selected by `...`. Supplying neither will move columns to the #' left-hand side; specifying both is an error. #' @return #' An object of the same type as `.data`. The output has the following #' properties: #' #' * Rows are not affected. #' * The same columns appear in the output, but (usually) in a different place #' and possibly renamed. #' * Data frame attributes are preserved. #' * Groups are not affected. #' @section Methods: #' This function is a **generic**, which means that packages can provide #' implementations (methods) for other classes. See the documentation of #' individual methods for extra arguments and differences in behaviour. #' #' The following methods are currently available in loaded packages: #' \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("relocate")}. #' @export #' @examples #' df <- tibble(a = 1, b = 1, c = 1, d = "a", e = "a", f = "a") #' df %>% relocate(f) #' df %>% relocate(a, .after = c) #' df %>% relocate(f, .before = b) #' df %>% relocate(a, .after = last_col()) #' #' # relocated columns can change name #' df %>% relocate(ff = f) #' #' # Can also select variables based on their type #' df %>% relocate(where(is.character)) #' df %>% relocate(where(is.numeric), .after = last_col()) #' # Or with any other select helper #' df %>% relocate(any_of(c("a", "e", "i", "o", "u"))) #' #' # When .before or .after refers to multiple variables they will be #' # moved to be immediately before/after the selected variables. #' df2 <- tibble(a = 1, b = "a", c = 1, d = "a") #' df2 %>% relocate(where(is.numeric), .after = where(is.character)) #' df2 %>% relocate(where(is.numeric), .before = where(is.character)) relocate <- function(.data, ..., .before = NULL, .after = NULL) { UseMethod("relocate") } #' @export relocate.data.frame <- function(.data, ..., .before = NULL, .after = NULL) { loc <- eval_relocate( expr = expr(c(...)), data = .data, before = enquo(.before), after = enquo(.after), before_arg = ".before", after_arg = ".after" ) out <- dplyr_col_select(.data, loc) out <- set_names(out, names(loc)) out } eval_relocate <- function(expr, data, ..., before = NULL, after = NULL, before_arg = "before", after_arg = "after", env = caller_env(), error_call = caller_env()) { # `eval_relocate()` returns a named integer vector of size `ncol(data)` # describing how to rearrange `data`. Each location in the range # `seq2(1L, ncol(data))` is represented once. The names are the new names to # assign to those columns. They are typically the same as the original names, # but `expr` does allow for renaming. check_dots_empty0(...) sel <- tidyselect::eval_select( expr = expr, data = data, env = env, error_call = error_call ) # Enforce the invariant that relocating can't change the number of columns by # retaining only the last instance of a column that is renamed multiple times # TODO: https://github.com/r-lib/vctrs/issues/1442 # `sel <- vec_unique(sel, which = "last")` loc_last <- which(!duplicated(sel, fromLast = TRUE)) sel <- vec_slice(sel, loc_last) n <- length(data) before <- as_quosure(before, env = env) after <- as_quosure(after, env = env) has_before <- !quo_is_null(before) has_after <- !quo_is_null(after) if (has_before && has_after) { message <- glue("Can't supply both `{before_arg}` and `{after_arg}`.") abort(message, call = error_call) } if (has_before) { # TODO: Use `allow_rename = FALSE`. https://github.com/r-lib/tidyselect/issues/221 where <- tidyselect::eval_select(before, data, env = env, error_call = error_call) where <- unname(where) if (length(where) == 0L) { # Empty `before` selection pushes `sel` to the front where <- 1L } else { where <- min(where) } } else if (has_after) { # TODO: Use `allow_rename = FALSE`. https://github.com/r-lib/tidyselect/issues/221 where <- tidyselect::eval_select(after, data, env = env, error_call = error_call) where <- unname(where) if (length(where) == 0L) { # Empty `after` selection pushes `sel` to the back where <- n } else { where <- max(where) } where <- where + 1L } else { # Defaults to `before = everything()` if neither `before` nor `after` are supplied where <- 1L } lhs <- seq2(1L, where - 1L) rhs <- seq2(where, n) lhs <- setdiff(lhs, sel) rhs <- setdiff(rhs, sel) names <- names(data) names(lhs) <- names[lhs] names(rhs) <- names[rhs] sel <- vec_c(lhs, sel, rhs) sel } dplyr/R/join-by.R0000644000176200001440000007727214525503021013313 0ustar liggesusers#' Join specifications #' #' `join_by()` constructs a specification that describes how to join two tables #' using a small domain specific language. The result can be supplied as the #' `by` argument to any of the join functions (such as [left_join()]). #' #' # Join types #' #' The following types of joins are supported by dplyr: #' - Equality joins #' - Inequality joins #' - Rolling joins #' - Overlap joins #' - Cross joins #' #' Equality, inequality, rolling, and overlap joins are discussed in more detail #' below. Cross joins are implemented through [cross_join()]. #' #' ## Equality joins #' #' Equality joins require keys to be equal between one or more pairs of columns, #' and are the most common type of join. To construct an equality join using #' `join_by()`, supply two column names to join with separated by `==`. #' Alternatively, supplying a single name will be interpreted as an equality #' join between two columns of the same name. For example, `join_by(x)` is #' equivalent to `join_by(x == x)`. #' #' ## Inequality joins #' #' Inequality joins match on an inequality, such as `>`, `>=`, `<`, or `<=`, and #' are common in time series analysis and genomics. To construct an inequality #' join using `join_by()`, supply two column names separated by one of the above #' mentioned inequalities. #' #' Note that inequality joins will match a single row in `x` to a potentially #' large number of rows in `y`. Be extra careful when constructing inequality #' join specifications! #' #' ## Rolling joins #' #' Rolling joins are a variant of inequality joins that limit the results #' returned from an inequality join condition. They are useful for "rolling" the #' closest match forward/backwards when there isn't an exact match. To construct #' a rolling join, wrap an inequality with `closest()`. #' #' - `closest(expr)` #' #' `expr` must be an inequality involving one of: `>`, `>=`, `<`, or `<=`. #' #' For example, `closest(x >= y)` is interpreted as: For each value in `x`, #' find the closest value in `y` that is less than or equal to that `x` value. #' #' `closest()` will always use the left-hand table (`x`) as the primary table, #' and the right-hand table (`y`) as the one to find the closest match in, #' regardless of how the inequality is specified. For example, #' `closest(y$a >= x$b)` will always be interpreted as `closest(x$b <= y$a)`. #' #' ## Overlap joins #' #' Overlap joins are a special case of inequality joins involving one or two #' columns from the left-hand table _overlapping_ a range defined by two columns #' from the right-hand table. There are three helpers that `join_by()` #' recognizes to assist with constructing overlap joins, all of which can be #' constructed from simpler inequalities. #' #' - `between(x, y_lower, y_upper, ..., bounds = "[]")` #' #' For each value in `x`, this finds everywhere that value falls between #' `[y_lower, y_upper]`. Equivalent to `x >= y_lower, x <= y_upper` by #' default. #' #' `bounds` can be one of \code{"[]"}, \code{"[)"}, \code{"(]"}, or #' \code{"()"} to alter the inclusiveness of the lower and upper bounds. This #' changes whether `>=` or `>` and `<=` or `<` are used to build the #' inequalities shown above. #' #' Dots are for future extensions and must be empty. #' #' - `within(x_lower, x_upper, y_lower, y_upper)` #' #' For each range in `[x_lower, x_upper]`, this finds everywhere that range #' falls completely within `[y_lower, y_upper]`. Equivalent to `x_lower >= #' y_lower, x_upper <= y_upper`. #' #' The inequalities used to build `within()` are the same regardless of the #' inclusiveness of the supplied ranges. #' #' - `overlaps(x_lower, x_upper, y_lower, y_upper, ..., bounds = "[]")` #' #' For each range in `[x_lower, x_upper]`, this finds everywhere that range #' overlaps `[y_lower, y_upper]` in any capacity. Equivalent to `x_lower <= #' y_upper, x_upper >= y_lower` by default. #' #' `bounds` can be one of \code{"[]"}, \code{"[)"}, \code{"(]"}, or #' \code{"()"} to alter the inclusiveness of the lower and upper bounds. #' \code{"[]"} uses `<=` and `>=`, but the 3 other options use `<` and `>` #' and generate the exact same inequalities. #' #' Dots are for future extensions and must be empty. #' #' These conditions assume that the ranges are well-formed and non-empty, i.e. #' `x_lower <= x_upper` when bounds are treated as \code{"[]"}, and #' `x_lower < x_upper` otherwise. #' #' # Column referencing #' #' When specifying join conditions, `join_by()` assumes that column names on the #' left-hand side of the condition refer to the left-hand table (`x`), and names #' on the right-hand side of the condition refer to the right-hand table (`y`). #' Occasionally, it is clearer to be able to specify a right-hand table name on #' the left-hand side of the condition, and vice versa. To support this, column #' names can be prefixed by `x$` or `y$` to explicitly specify which table they #' come from. #' #' @param ... Expressions specifying the join. #' #' Each expression should consist of one of the following: #' #' - Equality condition: `==` #' - Inequality conditions: `>=`, `>`, `<=`, or `<` #' - Rolling helper: `closest()` #' - Overlap helpers: `between()`, `within()`, or `overlaps()` #' #' Other expressions are not supported. If you need to perform a join on #' a computed variable, e.g. `join_by(sales_date - 40 >= promo_date)`, #' you'll need to precompute and store it in a separate column. #' #' Column names should be specified as quoted or unquoted names. By default, #' the name on the left-hand side of a join condition refers to the left-hand #' table, unless overridden by explicitly prefixing the column name with #' either `x$` or `y$`. #' #' If a single column name is provided without any join conditions, it is #' interpreted as if that column name was duplicated on each side of `==`, #' i.e. `x` is interpreted as `x == x`. #' #' @aliases closest overlaps within #' #' @export #' @examples #' sales <- tibble( #' id = c(1L, 1L, 1L, 2L, 2L), #' sale_date = as.Date(c("2018-12-31", "2019-01-02", "2019-01-05", "2019-01-04", "2019-01-01")) #' ) #' sales #' #' promos <- tibble( #' id = c(1L, 1L, 2L), #' promo_date = as.Date(c("2019-01-01", "2019-01-05", "2019-01-02")) #' ) #' promos #' #' # Match `id` to `id`, and `sale_date` to `promo_date` #' by <- join_by(id, sale_date == promo_date) #' left_join(sales, promos, by) #' #' # For each `sale_date` within a particular `id`, #' # find all `promo_date`s that occurred before that particular sale #' by <- join_by(id, sale_date >= promo_date) #' left_join(sales, promos, by) #' #' # For each `sale_date` within a particular `id`, #' # find only the closest `promo_date` that occurred before that sale #' by <- join_by(id, closest(sale_date >= promo_date)) #' left_join(sales, promos, by) #' #' # If you want to disallow exact matching in rolling joins, use `>` rather #' # than `>=`. Note that the promo on `2019-01-05` is no longer considered the #' # closest match for the sale on the same date. #' by <- join_by(id, closest(sale_date > promo_date)) #' left_join(sales, promos, by) #' #' # Same as before, but also require that the promo had to occur at most 1 #' # day before the sale was made. We'll use a full join to see that id 2's #' # promo on `2019-01-02` is no longer matched to the sale on `2019-01-04`. #' sales <- mutate(sales, sale_date_lower = sale_date - 1) #' by <- join_by(id, closest(sale_date >= promo_date), sale_date_lower <= promo_date) #' full_join(sales, promos, by) #' #' # --------------------------------------------------------------------------- #' #' segments <- tibble( #' segment_id = 1:4, #' chromosome = c("chr1", "chr2", "chr2", "chr1"), #' start = c(140, 210, 380, 230), #' end = c(150, 240, 415, 280) #' ) #' segments #' #' reference <- tibble( #' reference_id = 1:4, #' chromosome = c("chr1", "chr1", "chr2", "chr2"), #' start = c(100, 200, 300, 415), #' end = c(150, 250, 399, 450) #' ) #' reference #' #' # Find every time a segment `start` falls between the reference #' # `[start, end]` range. #' by <- join_by(chromosome, between(start, start, end)) #' full_join(segments, reference, by) #' #' # If you wanted the reference columns first, supply `reference` as `x` #' # and `segments` as `y`, then explicitly refer to their columns using `x$` #' # and `y$`. #' by <- join_by(chromosome, between(y$start, x$start, x$end)) #' full_join(reference, segments, by) #' #' # Find every time a segment falls completely within a reference. #' # Sometimes using `x$` and `y$` makes your intentions clearer, even if they #' # match the default behavior. #' by <- join_by(chromosome, within(x$start, x$end, y$start, y$end)) #' inner_join(segments, reference, by) #' #' # Find every time a segment overlaps a reference in any way. #' by <- join_by(chromosome, overlaps(x$start, x$end, y$start, y$end)) #' full_join(segments, reference, by) #' #' # It is common to have right-open ranges with bounds like `[)`, which would #' # mean an end value of `415` would no longer overlap a start value of `415`. #' # Setting `bounds` allows you to compute overlaps with those kinds of ranges. #' by <- join_by(chromosome, overlaps(x$start, x$end, y$start, y$end, bounds = "[)")) #' full_join(segments, reference, by) join_by <- function(...) { # `join_by()` works off pure expressions with no evaluation in the user's # environment, but we want to allow `{{ }}` to make it easier to program with. # The best way to do this is to capture quosures with `enquos()`, and then # immediately squash them recursively into expressions with `quo_squash()`. exprs <- enquos(..., .named = NULL) exprs <- map(exprs, quo_squash) n <- length(exprs) if (n == 0L) { abort(c( "Must supply at least one expression.", i = "If you want a cross join, use `cross_join()`." )) } if (!is_null(names(exprs))) { abort(c( "Can't name join expressions.", i = "Did you use `=` instead of `==`?" )) } error_call <- environment() bys <- vector("list", length = n) for (i in seq_len(n)) { bys[[i]] <- parse_join_by_expr(exprs[[i]], i, error_call = error_call) } # `between()`, `overlaps()`, and `within()` parse into >1 binary conditions x <- flat_map_chr(bys, function(by) by$x) y <- flat_map_chr(bys, function(by) by$y) filter <- flat_map_chr(bys, function(by) by$filter) condition <- flat_map_chr(bys, function(by) by$condition) new_join_by( exprs = exprs, condition = condition, filter = filter, x = x, y = y ) } #' @export print.dplyr_join_by <- function(x, ...) { out <- map_chr(x$exprs, expr_deparse) out <- glue_collapse(glue("- {out}"), sep = "\n") cat("Join By:\n") cat(out) invisible(x) } new_join_by <- function(exprs = list(), condition = character(), filter = character(), x = character(), y = character()) { out <- list( exprs = exprs, condition = condition, filter = filter, x = x, y = y ) structure(out, class = "dplyr_join_by") } flat_map_chr <- function(x, fn) { list_unchop(map(x, fn), ptype = character()) } # ------------------------------------------------------------------------------ # Internal generic as_join_by <- function(x, error_call = caller_env()) { UseMethod("as_join_by") } #' @export as_join_by.default <- function(x, error_call = caller_env()) { message <- glue(paste0( "`by` must be a (named) character vector, list, `join_by()` result, ", "or NULL, not {obj_type_friendly(x)}." )) abort(message, call = error_call) } #' @export as_join_by.dplyr_join_by <- function(x, error_call = caller_env()) { x } #' @export as_join_by.character <- function(x, error_call = caller_env()) { x_names <- names(x) %||% x y_names <- unname(x) # If x partially named, assume unnamed are the same in both tables x_names[x_names == ""] <- y_names[x_names == ""] finalise_equi_join_by(x_names, y_names) } #' @export as_join_by.list <- function(x, error_call = caller_env()) { # TODO: check lengths x_names <- x[["x"]] y_names <- x[["y"]] if (!is_character(x_names)) { abort("`by$x` must evaluate to a character vector.") } if (!is_character(y_names)) { abort("`by$y` must evaluate to a character vector.") } finalise_equi_join_by(x_names, y_names) } finalise_equi_join_by <- function(x_names, y_names) { n <- length(x_names) if (n == 0L) { abort( "Backwards compatible support for cross joins should have been caught earlier.", .internal = TRUE ) } exprs <- map2(x_names, y_names, function(x, y) expr(!!x == !!y)) condition <- vec_rep("==", times = n) filter <- vec_rep("none", times = n) new_join_by( exprs = exprs, condition = condition, filter = filter, x = x_names, y = y_names ) } # ------------------------------------------------------------------------------ join_by_common <- function(x_names, y_names, ..., error_call = caller_env()) { check_dots_empty0(...) by <- intersect(x_names, y_names) if (length(by) == 0) { message <- c( "`by` must be supplied when `x` and `y` have no common variables.", i = "Use `cross_join()` to perform a cross-join." ) abort(message, call = error_call) } by_names <- tick_if_needed(by) by_names <- glue_collapse(by_names, sep = ", ") inform(glue("Joining with `by = join_by({by_names})`")) finalise_equi_join_by(by, by) } # ------------------------------------------------------------------------------ # In the parsing implementation below, note that all `binding_*()` functions # should maintain a function signature that exactly matches what is documented # in `?join_by`, as these get bound directly to their corresponding function # name, i.e. `binding_join_by_between()` is bound to `between()`. This is why # these functions don't have an `error_call` argument. parse_join_by_expr <- function(expr, i, error_call) { if (is_missing(expr)) { message <- c( "Expressions can't be missing.", x = glue("Expression {i} is missing.") ) abort(message, call = error_call) } if (length(expr) == 0L) { message <- c( "Expressions can't be empty.", x = glue("Expression {i} is empty.") ) abort(message, call = error_call) } if (is_symbol_or_string(expr)) { expr <- expr(!!expr == !!expr) } if (!is_call(expr)) { message <- c( "Each element of `...` must be a single column name or a join by expression.", x = glue("Element {i} is not a name and not an expression.") ) abort(message, call = error_call) } if (is_call(expr, ns = "dplyr")) { # Normalize by removing the `dplyr::` expr[[1]] <- sym(call_name(expr)) } op <- expr[[1]] if (!is_symbol(op)) { if (is_call(op, name = "::")) { stop_invalid_namespaced_expression(expr, i, error_call) } else { stop_invalid_top_expression(expr, i, error_call) } } op <- as_string(op) switch( op, "==" =, ">=" =, ">" =, "<=" =, "<" = parse_join_by_binary(expr, i, error_call), "between" = parse_join_by_between(expr, i, error_call), "within" = parse_join_by_within(expr, i, error_call), "overlaps" = parse_join_by_overlaps(expr, i, error_call), "closest" = parse_join_by_closest(expr, i, error_call), "$" = stop_invalid_dollar_sign(expr, i, error_call), stop_invalid_top_expression(expr, i, error_call) ) } stop_invalid_dollar_sign <- function(expr, i, call) { message <- c( "Can't use `$` when specifying a single column name.", i = glue("Expression {i} is {err_expr(expr)}.") ) abort(message, call = call) } stop_invalid_top_expression <- function(expr, i, call) { options <- c("==", ">=", ">", "<=", "<", "closest()", "between()", "overlaps()", "within()") options <- glue::backtick(options) options <- glue_collapse(options, sep = ", ", last = ", or ") message <- c( glue("Expressions must use one of: {options}."), i = glue("Expression {i} is {err_expr(expr)}.") ) abort(message, call = call) } stop_invalid_namespaced_expression <- function(expr, i, call) { message <- c( glue("Expressions can only be namespace prefixed with `dplyr::`."), i = glue("Expression {i} is {err_expr(expr)}.") ) abort(message, call = call) } parse_join_by_name <- function(expr, i, default_side, error_call) { if (is_symbol_or_string(expr)) { name <- as_string(expr) return(list(name = name, side = default_side)) } if (is_call(expr, name = "$")) { return(parse_join_by_dollar(expr, i, error_call)) } message <- c( paste0( "Expressions can't contain computed columns, ", "and can only reference columns by name or by explicitly specifying ", "a side, like `x$col` or `y$col`." ), i = glue("Expression {i} contains {err_expr(expr)}.") ) abort(message, call = error_call) } parse_join_by_dollar <- function(expr, i, error_call) { args <- eval_join_by_dollar(expr, error_call) side <- args$side if (!is_symbol_or_string(side)) { message <- c( "The left-hand side of a `$` expression must be a symbol or string.", i = glue("Expression {i} contains {err_expr(expr)}.") ) abort(message, call = error_call) } side <- as_string(side) sides <- c("x", "y") if (!side %in% sides) { message <- c( "The left-hand side of a `$` expression must be either `x$` or `y$`.", i = glue("Expression {i} contains {err_expr(expr)}.") ) abort(message, call = error_call) } name <- args$name if (!is_symbol_or_string(name)) { message <- c( "The right-hand side of a `$` expression must be a symbol or string.", i = glue("Expression {i} contains {err_expr(expr)}.") ) abort(message, call = error_call) } name <- as_string(name) list(name = name, side = side) } eval_join_by_dollar <- function(expr, error_call) { env <- new_environment() local_error_call(error_call, frame = env) env_poke(env, "$", binding_join_by_dollar) eval_tidy(expr, env = env) } binding_join_by_dollar <- function(x, name) { error_call <- caller_env() x <- enexpr(x) name <- enexpr(name) check_missing_arg(x, "x", "$", error_call, binary_op = TRUE) check_missing_arg(name, "name", "$", error_call, binary_op = TRUE) list(side = x, name = name) } parse_join_by_binary <- function(expr, i, error_call) { args <- eval_join_by_binary(expr, error_call) condition <- args$condition lhs <- args$lhs rhs <- args$rhs lhs <- parse_join_by_name(lhs, i, default_side = "x", error_call = error_call) rhs <- parse_join_by_name(rhs, i, default_side = "y", error_call = error_call) if (lhs$side == rhs$side) { message <- c( "The left and right-hand sides of a binary expression must reference different tables.", i = glue("Expression {i} contains {err_expr(expr)}.") ) abort(message, call = error_call) } if (lhs$side == "x") { x <- lhs$name y <- rhs$name } else { # Must reverse the op lookup <- c("==" = "==", ">=" = "<=", ">" = "<", "<=" = ">=", "<" = ">") condition <- lookup[[condition]] x <- rhs$name y <- lhs$name } list( x = x, y = y, condition = condition, filter = "none" ) } eval_join_by_binary <- function(expr, error_call) { env <- new_environment() local_error_call(error_call, frame = env) env_bind( env, `==` = binding_join_by_equality, `>` = binding_join_by_greater_than, `>=` = binding_join_by_greater_than_or_equal, `<` = binding_join_by_less_than, `<=` = binding_join_by_less_than_or_equal ) eval_tidy(expr, env = env) } binding_join_by_binary <- function(condition, error_call, x, y) { x <- enexpr(x) y <- enexpr(y) check_missing_arg(x, "x", condition, error_call, binary_op = TRUE) check_missing_arg(y, "y", condition, error_call, binary_op = TRUE) list(condition = condition, lhs = x, rhs = y) } binding_join_by_equality <- function(x, y) { binding_join_by_binary("==", caller_env(), !!enexpr(x), !!enexpr(y)) } binding_join_by_greater_than <- function(x, y) { binding_join_by_binary(">", caller_env(), !!enexpr(x), !!enexpr(y)) } binding_join_by_greater_than_or_equal <- function(x, y) { binding_join_by_binary(">=", caller_env(), !!enexpr(x), !!enexpr(y)) } binding_join_by_less_than <- function(x, y) { binding_join_by_binary("<", caller_env(), !!enexpr(x), !!enexpr(y)) } binding_join_by_less_than_or_equal <- function(x, y) { binding_join_by_binary("<=", caller_env(), !!enexpr(x), !!enexpr(y)) } parse_join_by_closest <- function(expr, i, error_call) { args <- eval_join_by_closest(expr, error_call) expr_binary <- args$expr if (!is_call(expr_binary)) { message <- c( "The first argument of `closest()` must be an expression.", i = glue("Expression {i} is {err_expr(expr)}.") ) abort(message, call = error_call) } op <- as_string(expr_binary[[1]]) out <- switch( op, ">=" =, ">" =, "<=" =, "<" = parse_join_by_binary(expr_binary, i, error_call), "==" = stop_join_by_closest_equal_expression(expr, i, error_call), stop_join_by_closest_invalid_expression(expr, i, error_call) ) filter <- switch( out$condition, ">=" = "max", ">" = "max", "<=" = "min", "<" = "min", abort("Unexpected `closest()` `condition`.", .internal = TRUE) ) out$filter <- filter out } eval_join_by_closest <- function(expr, error_call) { env <- new_environment() local_error_call(error_call, frame = env) env_poke(env, "closest", binding_join_by_closest) eval_tidy(expr, env = env) } binding_join_by_closest <- function(expr) { error_call <- caller_env() expr <- enexpr(expr) check_missing_arg(expr, "expr", "closest", error_call) list(expr = expr) } stop_join_by_closest_equal_expression <- function(expr, i, error_call) { # `closest(x == y)` doesn't make any sense, # even if vctrs can technically handle it. message <- c( "The expression used in `closest()` can't use `==`.", i = glue("Expression {i} is {err_expr(expr)}.") ) abort(message, call = error_call) } stop_join_by_closest_invalid_expression <- function(expr, i, error_call) { options <- c(">=", ">", "<=", "<") options <- glue::backtick(options) options <- glue_collapse(options, sep = ", ", last = ", or ") message <- c( glue("The expression used in `closest()` must use one of: {options}."), i = glue("Expression {i} is {err_expr(expr)}.") ) abort(message, call = error_call) } parse_join_by_between <- function(expr, i, error_call) { args <- eval_join_by_between(expr, error_call) lhs <- parse_join_by_name(args$lhs, i, "x", error_call) rhs_lower <- parse_join_by_name(args$rhs_lower, i, "y", error_call) rhs_upper <- parse_join_by_name(args$rhs_upper, i, "y", error_call) bounds <- args$bounds if (rhs_lower$side != rhs_upper$side) { message <- c( "Expressions containing `between()` must reference the same table for the lower and upper bounds.", i = glue("Expression {i} is {err_expr(expr)}.") ) abort(message, call = error_call) } if (lhs$side == rhs_lower$side) { message <- c( "Expressions containing `between()` can't all reference the same table.", i = glue("Expression {i} is {err_expr(expr)}.") ) abort(message, call = error_call) } if (lhs$side == "x") { x <- c(lhs$name, lhs$name) y <- c(rhs_lower$name, rhs_upper$name) condition <- switch( bounds, "[]" = c(">=", "<="), "[)" = c(">=", "<"), "(]" = c(">", "<="), "()" = c(">", "<") ) } else { x <- c(rhs_lower$name, rhs_upper$name) y <- c(lhs$name, lhs$name) condition <- switch( bounds, "[]" = c("<=", ">="), "[)" = c("<=", ">"), "(]" = c("<", ">="), "()" = c("<", ">") ) } filter <- c("none", "none") list( x = x, y = y, condition = condition, filter = filter ) } eval_join_by_between <- function(expr, error_call) { env <- new_environment() local_error_call(error_call, frame = env) env_poke(env, "between", binding_join_by_between) eval_tidy(expr, env = env) } binding_join_by_between <- function(x, y_lower, y_upper, ..., bounds = "[]") { error_call <- caller_env() check_join_by_dots_empty(..., fn = "between", call = error_call) x <- enexpr(x) y_lower <- enexpr(y_lower) y_upper <- enexpr(y_upper) check_missing_arg(x, "x", "between", error_call) check_missing_arg(y_lower, "y_lower", "between", error_call) check_missing_arg(y_upper, "y_upper", "between", error_call) bounds <- check_bounds(bounds, call = error_call) list(lhs = x, rhs_lower = y_lower, rhs_upper = y_upper, bounds = bounds) } parse_join_by_within <- function(expr, i, error_call) { args <- eval_join_by_within(expr, error_call) lhs_lower <- parse_join_by_name(args$lhs_lower, i, "x", error_call) lhs_upper <- parse_join_by_name(args$lhs_upper, i, "x", error_call) rhs_lower <- parse_join_by_name(args$rhs_lower, i, "y", error_call) rhs_upper <- parse_join_by_name(args$rhs_upper, i, "y", error_call) if (lhs_lower$side != lhs_upper$side) { message <- c( paste0( "Expressions containing `within()` must reference ", "the same table for the left-hand side lower and upper bounds." ), i = glue("Expression {i} is {err_expr(expr)}.") ) abort(message, call = error_call) } if (rhs_lower$side != rhs_upper$side) { message <- c( paste0( "Expressions containing `within()` must reference ", "the same table for the right-hand side lower and upper bounds." ), i = glue("Expression {i} is {err_expr(expr)}.") ) abort(message, call = error_call) } if (lhs_lower$side == rhs_lower$side) { message <- c( "Expressions containing `within()` can't all reference the same table.", i = glue("Expression {i} is {err_expr(expr)}.") ) abort(message, call = error_call) } if (lhs_lower$side == "x") { x <- c(lhs_lower$name, lhs_upper$name) y <- c(rhs_lower$name, rhs_upper$name) condition <- c(">=", "<=") } else { x <- c(rhs_lower$name, rhs_upper$name) y <- c(lhs_lower$name, lhs_upper$name) condition <- c("<=", ">=") } filter <- c("none", "none") list( x = x, y = y, condition = condition, filter = filter ) } eval_join_by_within <- function(expr, error_call) { env <- new_environment() local_error_call(error_call, frame = env) env_poke(env, "within", binding_join_by_within) eval_tidy(expr, env = env) } binding_join_by_within <- function(x_lower, x_upper, y_lower, y_upper) { error_call <- caller_env() x_lower <- enexpr(x_lower) x_upper <- enexpr(x_upper) y_lower <- enexpr(y_lower) y_upper <- enexpr(y_upper) check_missing_arg(x_lower, "x_lower", "within", error_call) check_missing_arg(x_upper, "x_upper", "within", error_call) check_missing_arg(y_lower, "y_lower", "within", error_call) check_missing_arg(y_upper, "y_upper", "within", error_call) list( lhs_lower = x_lower, lhs_upper = x_upper, rhs_lower = y_lower, rhs_upper = y_upper ) } parse_join_by_overlaps <- function(expr, i, error_call) { args <- eval_join_by_overlaps(expr, error_call) lhs_lower <- parse_join_by_name(args$lhs_lower, i, "x", error_call) lhs_upper <- parse_join_by_name(args$lhs_upper, i, "x", error_call) rhs_lower <- parse_join_by_name(args$rhs_lower, i, "y", error_call) rhs_upper <- parse_join_by_name(args$rhs_upper, i, "y", error_call) bounds <- args$bounds if (lhs_lower$side != lhs_upper$side) { message <- c( paste0( "Expressions containing `overlaps()` must reference ", "the same table for the left-hand side lower and upper bounds." ), i = glue("Expression {i} is {err_expr(expr)}.") ) abort(message, call = error_call) } if (rhs_lower$side != rhs_upper$side) { message <- c( paste0( "Expressions containing `overlaps()` must reference ", "the same table for the right-hand side lower and upper bounds." ), i = glue("Expression {i} is {err_expr(expr)}.") ) abort(message, call = error_call) } if (lhs_lower$side == rhs_lower$side) { message <- c( "Expressions containing `overlaps()` can't all reference the same table.", i = glue("Expression {i} is {err_expr(expr)}.") ) abort(message, call = error_call) } # 3 of the `bounds` have the exact same behavior, but the argument name is # consistent with `between(bounds =)` and easier to remember and interpret # than exposing `closed` directly (#6504). # - `[]` uses `<=` and `>=` # - All other conditions use `<` and `>` due to the presence of a `(` or `)` closed <- switch( bounds, "[]" = TRUE, "[)" = FALSE, "(]" = FALSE, "()" = FALSE, abort("Unknown `bounds`.", .internal = TRUE) ) if (lhs_lower$side == "x") { x <- c(lhs_lower$name, lhs_upper$name) y <- c(rhs_upper$name, rhs_lower$name) if (closed) { condition <- c("<=", ">=") } else { condition <- c("<", ">") } } else { x <- c(rhs_upper$name, rhs_lower$name) y <- c(lhs_lower$name, lhs_upper$name) if (closed) { condition <- c(">=", "<=") } else { condition <- c(">", "<") } } filter <- c("none", "none") list( x = x, y = y, condition = condition, filter = filter ) } eval_join_by_overlaps <- function(expr, error_call) { env <- new_environment() local_error_call(error_call, frame = env) env_poke(env, "overlaps", binding_join_by_overlaps) eval_tidy(expr, env = env) } binding_join_by_overlaps <- function(x_lower, x_upper, y_lower, y_upper, ..., bounds = "[]") { error_call <- caller_env() check_join_by_dots_empty(..., fn = "overlaps", call = error_call) x_lower <- enexpr(x_lower) x_upper <- enexpr(x_upper) y_lower <- enexpr(y_lower) y_upper <- enexpr(y_upper) check_missing_arg(x_lower, "x_lower", "overlaps", error_call) check_missing_arg(x_upper, "x_upper", "overlaps", error_call) check_missing_arg(y_lower, "y_lower", "overlaps", error_call) check_missing_arg(y_upper, "y_upper", "overlaps", error_call) bounds <- check_bounds(bounds, call = error_call) list( lhs_lower = x_lower, lhs_upper = x_upper, rhs_lower = y_lower, rhs_upper = y_upper, bounds = bounds ) } check_bounds <- function(bounds, call) { arg_match0( bounds, values = c("[]", "[)", "(]", "()"), error_call = call ) } check_join_by_dots_empty <- function(..., fn, call) { if (dots_n(...) == 0L) { return() } fn <- glue::backtick(glue("{fn}()")) message <- c( "`...` must be empty.", i = glue("Non-empty dots were detected inside {fn}.") ) abort(message, call = call) } check_missing_arg <- function(arg, arg_name, fn_name, error_call, ..., binary_op = FALSE) { check_dots_empty0(...) if (!is_missing(arg)) { return(invisible()) } if (!binary_op) { fn_name <- glue("{fn_name}()") } arg_name <- glue::backtick(arg_name) fn_name <- glue::backtick(fn_name) message <- c( glue("Expressions using {fn_name} can't contain missing arguments."), x = glue("Argument {arg_name} is missing.") ) abort(message, call = error_call) } is_symbol_or_string <- function(x) { is_symbol(x) || is_string(x) } err_expr <- function(expr) { expr <- expr_deparse(expr) expr <- glue::backtick(expr) expr } dplyr/R/import-standalone-purrr.R0000644000176200001440000001265514406402754016557 0ustar liggesusers# Standalone file: do not edit by hand # Source: # ---------------------------------------------------------------------- # # --- # repo: r-lib/rlang # file: standalone-purrr.R # last-updated: 2022-06-07 # license: https://unlicense.org # --- # # This file provides a minimal shim to provide a purrr-like API on top of # base R functions. They are not drop-in replacements but allow a similar style # of programming. # # ## Changelog # # 2022-06-07: # * `transpose()` is now more consistent with purrr when inner names # are not congruent (#1346). # # 2021-12-15: # * `transpose()` now supports empty lists. # # 2021-05-21: # * Fixed "object `x` not found" error in `imap()` (@mgirlich) # # 2020-04-14: # * Removed `pluck*()` functions # * Removed `*_cpl()` functions # * Used `as_function()` to allow use of `~` # * Used `.` prefix for helpers # # nocov start map <- function(.x, .f, ...) { .f <- as_function(.f, env = global_env()) lapply(.x, .f, ...) } walk <- function(.x, .f, ...) { map(.x, .f, ...) invisible(.x) } map_lgl <- function(.x, .f, ...) { .rlang_purrr_map_mold(.x, .f, logical(1), ...) } map_int <- function(.x, .f, ...) { .rlang_purrr_map_mold(.x, .f, integer(1), ...) } map_dbl <- function(.x, .f, ...) { .rlang_purrr_map_mold(.x, .f, double(1), ...) } map_chr <- function(.x, .f, ...) { .rlang_purrr_map_mold(.x, .f, character(1), ...) } .rlang_purrr_map_mold <- function(.x, .f, .mold, ...) { .f <- as_function(.f, env = global_env()) out <- vapply(.x, .f, .mold, ..., USE.NAMES = FALSE) names(out) <- names(.x) out } map2 <- function(.x, .y, .f, ...) { .f <- as_function(.f, env = global_env()) out <- mapply(.f, .x, .y, MoreArgs = list(...), SIMPLIFY = FALSE) if (length(out) == length(.x)) { set_names(out, names(.x)) } else { set_names(out, NULL) } } map2_lgl <- function(.x, .y, .f, ...) { as.vector(map2(.x, .y, .f, ...), "logical") } map2_int <- function(.x, .y, .f, ...) { as.vector(map2(.x, .y, .f, ...), "integer") } map2_dbl <- function(.x, .y, .f, ...) { as.vector(map2(.x, .y, .f, ...), "double") } map2_chr <- function(.x, .y, .f, ...) { as.vector(map2(.x, .y, .f, ...), "character") } imap <- function(.x, .f, ...) { map2(.x, names(.x) %||% seq_along(.x), .f, ...) } pmap <- function(.l, .f, ...) { .f <- as.function(.f) args <- .rlang_purrr_args_recycle(.l) do.call("mapply", c( FUN = list(quote(.f)), args, MoreArgs = quote(list(...)), SIMPLIFY = FALSE, USE.NAMES = FALSE )) } .rlang_purrr_args_recycle <- function(args) { lengths <- map_int(args, length) n <- max(lengths) stopifnot(all(lengths == 1L | lengths == n)) to_recycle <- lengths == 1L args[to_recycle] <- map(args[to_recycle], function(x) rep.int(x, n)) args } keep <- function(.x, .f, ...) { .x[.rlang_purrr_probe(.x, .f, ...)] } discard <- function(.x, .p, ...) { sel <- .rlang_purrr_probe(.x, .p, ...) .x[is.na(sel) | !sel] } map_if <- function(.x, .p, .f, ...) { matches <- .rlang_purrr_probe(.x, .p) .x[matches] <- map(.x[matches], .f, ...) .x } .rlang_purrr_probe <- function(.x, .p, ...) { if (is_logical(.p)) { stopifnot(length(.p) == length(.x)) .p } else { .p <- as_function(.p, env = global_env()) map_lgl(.x, .p, ...) } } compact <- function(.x) { Filter(length, .x) } transpose <- function(.l) { if (!length(.l)) { return(.l) } inner_names <- names(.l[[1]]) if (is.null(inner_names)) { fields <- seq_along(.l[[1]]) } else { fields <- set_names(inner_names) .l <- map(.l, function(x) { if (is.null(names(x))) { set_names(x, inner_names) } else { x } }) } # This way missing fields are subsetted as `NULL` instead of causing # an error .l <- map(.l, as.list) map(fields, function(i) { map(.l, .subset2, i) }) } every <- function(.x, .p, ...) { .p <- as_function(.p, env = global_env()) for (i in seq_along(.x)) { if (!rlang::is_true(.p(.x[[i]], ...))) return(FALSE) } TRUE } some <- function(.x, .p, ...) { .p <- as_function(.p, env = global_env()) for (i in seq_along(.x)) { if (rlang::is_true(.p(.x[[i]], ...))) return(TRUE) } FALSE } negate <- function(.p) { .p <- as_function(.p, env = global_env()) function(...) !.p(...) } reduce <- function(.x, .f, ..., .init) { f <- function(x, y) .f(x, y, ...) Reduce(f, .x, init = .init) } reduce_right <- function(.x, .f, ..., .init) { f <- function(x, y) .f(y, x, ...) Reduce(f, .x, init = .init, right = TRUE) } accumulate <- function(.x, .f, ..., .init) { f <- function(x, y) .f(x, y, ...) Reduce(f, .x, init = .init, accumulate = TRUE) } accumulate_right <- function(.x, .f, ..., .init) { f <- function(x, y) .f(y, x, ...) Reduce(f, .x, init = .init, right = TRUE, accumulate = TRUE) } detect <- function(.x, .f, ..., .right = FALSE, .p = is_true) { .p <- as_function(.p, env = global_env()) .f <- as_function(.f, env = global_env()) for (i in .rlang_purrr_index(.x, .right)) { if (.p(.f(.x[[i]], ...))) { return(.x[[i]]) } } NULL } detect_index <- function(.x, .f, ..., .right = FALSE, .p = is_true) { .p <- as_function(.p, env = global_env()) .f <- as_function(.f, env = global_env()) for (i in .rlang_purrr_index(.x, .right)) { if (.p(.f(.x[[i]], ...))) { return(i) } } 0L } .rlang_purrr_index <- function(x, right = FALSE) { idx <- seq_along(x) if (right) { idx <- rev(idx) } idx } # nocov end dplyr/R/bind-rows.R0000644000176200001440000000531514406402754013646 0ustar liggesusers#' Bind multiple data frames by row #' #' Bind any number of data frames by row, making a longer result. This is #' similar to `do.call(rbind, dfs)`, but the output will contain all columns #' that appear in any of the inputs. #' #' @param ... Data frames to combine. Each argument can either be a data frame, #' a list that could be a data frame, or a list of data frames. Columns are #' matched by name, and any missing columns will be filled with `NA`. #' @param .id The name of an optional identifier column. Provide a string to #' create an output column that identifies each input. The column will use #' names if available, otherwise it will use positions. #' @returns A data frame the same type as the first element of `...`. #' @aliases bind #' @export #' @examples #' df1 <- tibble(x = 1:2, y = letters[1:2]) #' df2 <- tibble(x = 4:5, z = 1:2) #' #' # You can supply individual data frames as arguments: #' bind_rows(df1, df2) #' #' # Or a list of data frames: #' bind_rows(list(df1, df2)) #' #' # When you supply a column name with the `.id` argument, a new #' # column is created to link each row to its original data frame #' bind_rows(list(df1, df2), .id = "id") #' bind_rows(list(a = df1, b = df2), .id = "id") bind_rows <- function(..., .id = NULL) { dots <- list2(...) # bind_rows() has weird legacy squashing behaviour is_flattenable <- function(x) !is_named(x) if (length(dots) == 1 && is_bare_list(dots[[1]])) { dots <- dots[[1]] } dots <- list_flatten(dots, fn = is_flattenable) dots <- discard(dots, is.null) # Used to restore type if (length(dots) == 0) { first <- NULL } else { first <- dots[[1L]] } if (is_named(dots) && !all(map_lgl(dots, dataframe_ish))) { # This is hit by map_dfr() so we can't easily deprecate return(as_tibble(dots)) } for (i in seq_along(dots)) { .x <- dots[[i]] if (!dataframe_ish(.x)) { abort(glue("Argument {i} must be a data frame or a named atomic vector.")) } if (obj_is_list(.x)) { dots[[i]] <- vctrs::data_frame(!!!.x, .name_repair = "minimal") } } if (!is_null(.id)) { check_string(.id) if (!is_named(dots)) { names(dots) <- seq_along(dots) } } else { # Don't let `vec_rbind(.id = NULL)` promote input names to row names names(dots) <- NULL } out <- vec_rbind(!!!dots, .names_to = .id, .error_call = current_env()) # Override vctrs coercion rules and instead derive class from first input if (is.data.frame(first)) { out <- dplyr_reconstruct(out, first) } else { out <- as_tibble(out) } out } # helpers ----------------------------------------------------------------- dataframe_ish <- function(.x) { is.data.frame(.x) || (vec_is(.x) && is_named(.x)) } dplyr/R/import-standalone-obj-type.R0000644000176200001440000002026714406402754017134 0ustar liggesusers# Standalone file: do not edit by hand # Source: # ---------------------------------------------------------------------- # # --- # repo: r-lib/rlang # file: standalone-obj-type.R # last-updated: 2022-10-04 # license: https://unlicense.org # --- # # ## Changelog # # 2022-10-04: # - `obj_type_friendly(value = TRUE)` now shows numeric scalars # literally. # - `stop_friendly_type()` now takes `show_value`, passed to # `obj_type_friendly()` as the `value` argument. # # 2022-10-03: # - Added `allow_na` and `allow_null` arguments. # - `NULL` is now backticked. # - Better friendly type for infinities and `NaN`. # # 2022-09-16: # - Unprefixed usage of rlang functions with `rlang::` to # avoid onLoad issues when called from rlang (#1482). # # 2022-08-11: # - Prefixed usage of rlang functions with `rlang::`. # # 2022-06-22: # - `friendly_type_of()` is now `obj_type_friendly()`. # - Added `obj_type_oo()`. # # 2021-12-20: # - Added support for scalar values and empty vectors. # - Added `stop_input_type()` # # 2021-06-30: # - Added support for missing arguments. # # 2021-04-19: # - Added support for matrices and arrays (#141). # - Added documentation. # - Added changelog. # # nocov start #' Return English-friendly type #' @param x Any R object. #' @param value Whether to describe the value of `x`. Special values #' like `NA` or `""` are always described. #' @param length Whether to mention the length of vectors and lists. #' @return A string describing the type. Starts with an indefinite #' article, e.g. "an integer vector". #' @noRd obj_type_friendly <- function(x, value = TRUE) { if (is_missing(x)) { return("absent") } if (is.object(x)) { if (inherits(x, "quosure")) { type <- "quosure" } else { type <- paste(class(x), collapse = "/") } return(sprintf("a <%s> object", type)) } if (!is_vector(x)) { return(.rlang_as_friendly_type(typeof(x))) } n_dim <- length(dim(x)) if (!n_dim) { if (!is_list(x) && length(x) == 1) { if (is_na(x)) { return(switch( typeof(x), logical = "`NA`", integer = "an integer `NA`", double = if (is.nan(x)) { "`NaN`" } else { "a numeric `NA`" }, complex = "a complex `NA`", character = "a character `NA`", .rlang_stop_unexpected_typeof(x) )) } show_infinites <- function(x) { if (x > 0) { "`Inf`" } else { "`-Inf`" } } str_encode <- function(x, width = 30, ...) { if (nchar(x) > width) { x <- substr(x, 1, width - 3) x <- paste0(x, "...") } encodeString(x, ...) } if (value) { if (is.numeric(x) && is.infinite(x)) { return(show_infinites(x)) } if (is.numeric(x) || is.complex(x)) { number <- as.character(round(x, 2)) what <- if (is.complex(x)) "the complex number" else "the number" return(paste(what, number)) } return(switch( typeof(x), logical = if (x) "`TRUE`" else "`FALSE`", character = { what <- if (nzchar(x)) "the string" else "the empty string" paste(what, str_encode(x, quote = "\"")) }, raw = paste("the raw value", as.character(x)), .rlang_stop_unexpected_typeof(x) )) } return(switch( typeof(x), logical = "a logical value", integer = "an integer", double = if (is.infinite(x)) show_infinites(x) else "a number", complex = "a complex number", character = if (nzchar(x)) "a string" else "\"\"", raw = "a raw value", .rlang_stop_unexpected_typeof(x) )) } if (length(x) == 0) { return(switch( typeof(x), logical = "an empty logical vector", integer = "an empty integer vector", double = "an empty numeric vector", complex = "an empty complex vector", character = "an empty character vector", raw = "an empty raw vector", list = "an empty list", .rlang_stop_unexpected_typeof(x) )) } } vec_type_friendly(x) } vec_type_friendly <- function(x, length = FALSE) { if (!is_vector(x)) { abort("`x` must be a vector.") } type <- typeof(x) n_dim <- length(dim(x)) add_length <- function(type) { if (length && !n_dim) { paste0(type, sprintf(" of length %s", length(x))) } else { type } } if (type == "list") { if (n_dim < 2) { return(add_length("a list")) } else if (is.data.frame(x)) { return("a data frame") } else if (n_dim == 2) { return("a list matrix") } else { return("a list array") } } type <- switch( type, logical = "a logical %s", integer = "an integer %s", numeric = , double = "a double %s", complex = "a complex %s", character = "a character %s", raw = "a raw %s", type = paste0("a ", type, " %s") ) if (n_dim < 2) { kind <- "vector" } else if (n_dim == 2) { kind <- "matrix" } else { kind <- "array" } out <- sprintf(type, kind) if (n_dim >= 2) { out } else { add_length(out) } } .rlang_as_friendly_type <- function(type) { switch( type, list = "a list", NULL = "`NULL`", environment = "an environment", externalptr = "a pointer", weakref = "a weak reference", S4 = "an S4 object", name = , symbol = "a symbol", language = "a call", pairlist = "a pairlist node", expression = "an expression vector", char = "an internal string", promise = "an internal promise", ... = "an internal dots object", any = "an internal `any` object", bytecode = "an internal bytecode object", primitive = , builtin = , special = "a primitive function", closure = "a function", type ) } .rlang_stop_unexpected_typeof <- function(x, call = caller_env()) { abort( sprintf("Unexpected type <%s>.", typeof(x)), call = call ) } #' Return OO type #' @param x Any R object. #' @return One of `"bare"` (for non-OO objects), `"S3"`, `"S4"`, #' `"R6"`, or `"R7"`. #' @noRd obj_type_oo <- function(x) { if (!is.object(x)) { return("bare") } class <- inherits(x, c("R6", "R7_object"), which = TRUE) if (class[[1]]) { "R6" } else if (class[[2]]) { "R7" } else if (isS4(x)) { "S4" } else { "S3" } } #' @param x The object type which does not conform to `what`. Its #' `obj_type_friendly()` is taken and mentioned in the error message. #' @param what The friendly expected type as a string. Can be a #' character vector of expected types, in which case the error #' message mentions all of them in an "or" enumeration. #' @param show_value Passed to `value` argument of `obj_type_friendly()`. #' @param ... Arguments passed to [abort()]. #' @inheritParams args_error_context #' @noRd stop_input_type <- function(x, what, ..., allow_na = FALSE, allow_null = FALSE, show_value = TRUE, arg = caller_arg(x), call = caller_env()) { # From standalone-cli.R cli <- env_get_list( nms = c("format_arg", "format_code"), last = topenv(), default = function(x) sprintf("`%s`", x), inherit = TRUE ) if (allow_na) { what <- c(what, cli$format_code("NA")) } if (allow_null) { what <- c(what, cli$format_code("NULL")) } if (length(what)) { what <- oxford_comma(what) } message <- sprintf( "%s must be %s, not %s.", cli$format_arg(arg), what, obj_type_friendly(x, value = show_value) ) abort(message, ..., call = call, arg = arg) } oxford_comma <- function(chr, sep = ", ", final = "or") { n <- length(chr) if (n < 2) { return(chr) } head <- chr[seq_len(n - 1)] last <- chr[n] head <- paste(head, collapse = sep) # Write a or b. But a, b, or c. if (n > 2) { paste0(head, sep, final, " ", last) } else { paste0(head, " ", final, " ", last) } } # nocov end dplyr/R/case-when.R0000644000176200001440000002263414472225345013621 0ustar liggesusers#' A general vectorised if-else #' #' @description #' This function allows you to vectorise multiple [if_else()] statements. Each #' case is evaluated sequentially and the first match for each element #' determines the corresponding value in the output vector. If no cases match, #' the `.default` is used as a final "else" statment. #' #' `case_when()` is an R equivalent of the SQL "searched" `CASE WHEN` statement. #' #' @param ... <[`dynamic-dots`][rlang::dyn-dots]> A sequence of two-sided #' formulas. The left hand side (LHS) determines which values match this case. #' The right hand side (RHS) provides the replacement value. #' #' The LHS inputs must evaluate to logical vectors. #' #' The RHS inputs will be coerced to their common type. #' #' All inputs will be recycled to their common size. That said, we encourage #' all LHS inputs to be the same size. Recycling is mainly useful for RHS #' inputs, where you might supply a size 1 input that will be recycled to the #' size of the LHS inputs. #' #' `NULL` inputs are ignored. #' #' @param .default The value used when all of the LHS inputs return either #' `FALSE` or `NA`. #' #' `.default` must be size 1 or the same size as the common size computed #' from `...`. #' #' `.default` participates in the computation of the common type with the RHS #' inputs. #' #' `NA` values in the LHS conditions are treated like `FALSE`, meaning that #' the result at those locations will be assigned the `.default` value. To #' handle missing values in the conditions differently, you must explicitly #' catch them with another condition before they fall through to the #' `.default`. This typically involves some variation of `is.na(x) ~ value` #' tailored to your usage of `case_when()`. #' #' If `NULL`, the default, a missing value will be used. #' #' @param .ptype An optional prototype declaring the desired output type. If #' supplied, this overrides the common type of the RHS inputs. #' #' @param .size An optional size declaring the desired output size. If supplied, #' this overrides the common size computed from `...`. #' #' @return A vector with the same size as the common size computed from the #' inputs in `...` and the same type as the common type of the RHS inputs #' in `...`. #' #' @seealso [case_match()] #' #' @export #' @examples #' x <- 1:70 #' case_when( #' x %% 35 == 0 ~ "fizz buzz", #' x %% 5 == 0 ~ "fizz", #' x %% 7 == 0 ~ "buzz", #' .default = as.character(x) #' ) #' #' # Like an if statement, the arguments are evaluated in order, so you must #' # proceed from the most specific to the most general. This won't work: #' case_when( #' x %% 5 == 0 ~ "fizz", #' x %% 7 == 0 ~ "buzz", #' x %% 35 == 0 ~ "fizz buzz", #' .default = as.character(x) #' ) #' #' # If none of the cases match and no `.default` is supplied, NA is used: #' case_when( #' x %% 35 == 0 ~ "fizz buzz", #' x %% 5 == 0 ~ "fizz", #' x %% 7 == 0 ~ "buzz", #' ) #' #' # Note that `NA` values on the LHS are treated like `FALSE` and will be #' # assigned the `.default` value. You must handle them explicitly if you #' # want to use a different value. The exact way to handle missing values is #' # dependent on the set of LHS conditions you use. #' x[2:4] <- NA_real_ #' case_when( #' x %% 35 == 0 ~ "fizz buzz", #' x %% 5 == 0 ~ "fizz", #' x %% 7 == 0 ~ "buzz", #' is.na(x) ~ "nope", #' .default = as.character(x) #' ) #' #' # `case_when()` evaluates all RHS expressions, and then constructs its #' # result by extracting the selected (via the LHS expressions) parts. #' # In particular `NaN`s are produced in this case: #' y <- seq(-2, 2, by = .5) #' case_when( #' y >= 0 ~ sqrt(y), #' .default = y #' ) #' #' # `case_when()` is particularly useful inside `mutate()` when you want to #' # create a new variable that relies on a complex combination of existing #' # variables #' starwars %>% #' select(name:mass, gender, species) %>% #' mutate( #' type = case_when( #' height > 200 | mass > 200 ~ "large", #' species == "Droid" ~ "robot", #' .default = "other" #' ) #' ) #' #' #' # `case_when()` is not a tidy eval function. If you'd like to reuse #' # the same patterns, extract the `case_when()` call in a normal #' # function: #' case_character_type <- function(height, mass, species) { #' case_when( #' height > 200 | mass > 200 ~ "large", #' species == "Droid" ~ "robot", #' .default = "other" #' ) #' } #' #' case_character_type(150, 250, "Droid") #' case_character_type(150, 150, "Droid") #' #' # Such functions can be used inside `mutate()` as well: #' starwars %>% #' mutate(type = case_character_type(height, mass, species)) %>% #' pull(type) #' #' # `case_when()` ignores `NULL` inputs. This is useful when you'd #' # like to use a pattern only under certain conditions. Here we'll #' # take advantage of the fact that `if` returns `NULL` when there is #' # no `else` clause: #' case_character_type <- function(height, mass, species, robots = TRUE) { #' case_when( #' height > 200 | mass > 200 ~ "large", #' if (robots) species == "Droid" ~ "robot", #' .default = "other" #' ) #' } #' #' starwars %>% #' mutate(type = case_character_type(height, mass, species, robots = FALSE)) %>% #' pull(type) case_when <- function(..., .default = NULL, .ptype = NULL, .size = NULL) { args <- list2(...) args <- case_formula_evaluate( args = args, default_env = caller_env(), dots_env = current_env(), error_call = current_env() ) conditions <- args$lhs values <- args$rhs # `case_when()`'s formula interface finds the common size of ALL of its inputs. # This is what allows `TRUE ~` to work. .size <- vec_size_common(!!!conditions, !!!values, .size = .size) conditions <- vec_recycle_common(!!!conditions, .size = .size) values <- vec_recycle_common(!!!values, .size = .size) vec_case_when( conditions = conditions, values = values, conditions_arg = "", values_arg = "", default = .default, default_arg = ".default", ptype = .ptype, size = .size, call = current_env() ) } case_formula_evaluate <- function(args, default_env, dots_env, error_call) { # `case_when()`'s formula interface compacts `NULL`s args <- compact_null(args) n_args <- length(args) seq_args <- seq_len(n_args) pairs <- map2( .x = args, .y = seq_args, .f = function(x, i) { validate_and_split_formula( x = x, i = i, default_env = default_env, dots_env = dots_env, error_call = error_call ) } ) lhs <- vector("list", n_args) rhs <- vector("list", n_args) env_error_info <- new_environment() # Using 1 call to `withCallingHandlers()` that wraps all `eval_tidy()` # evaluations to avoid repeated handler setup (#6674) withCallingHandlers( for (i in seq_args) { env_error_info[["i"]] <- i pair <- pairs[[i]] env_error_info[["side"]] <- "left" elt_lhs <- eval_tidy(pair$lhs, env = default_env) env_error_info[["side"]] <- "right" elt_rhs <- eval_tidy(pair$rhs, env = default_env) if (!is.null(elt_lhs)) { lhs[[i]] <- elt_lhs } if (!is.null(elt_rhs)) { rhs[[i]] <- elt_rhs } }, error = function(cnd) { message <- glue::glue_data( env_error_info, "Failed to evaluate the {side}-hand side of formula {i}." ) abort(message, parent = cnd, call = error_call) } ) # TODO: Ideally we'd name the lhs/rhs values with their `as_label()`-ed # expressions. But `as_label()` is much too slow for that to be useful in # a grouped `mutate()`. We need a way to add ALTREP lazy names that only get # materialized on demand (i.e. on error). Until then, we fall back to the # positional names (like `..1` or `..3`) with info about left/right (#6674). # # # Add the expressions as names for `lhs` and `rhs` for nice errors. # # These names also get passed on to the underlying vctrs backend. # lhs_names <- map(quos_pairs, function(pair) pair$lhs) # lhs_names <- map_chr(lhs_names, as_label) # names(lhs) <- lhs_names # # rhs_names <- map(quos_pairs, function(pair) pair$rhs) # rhs_names <- map_chr(rhs_names, as_label) # names(rhs) <- rhs_names if (n_args > 0L) { names(lhs) <- paste0("..", seq_args, " (left)") names(rhs) <- paste0("..", seq_args, " (right)") } list( lhs = lhs, rhs = rhs ) } validate_and_split_formula <- function(x, i, default_env, dots_env, error_call) { if (is_quosure(x)) { # We specially handle quosures, assuming they hold formulas default_env <- quo_get_env(x) x <- quo_get_expr(x) } if (!is_formula(x, lhs = TRUE)) { arg <- substitute(...(), dots_env)[[i]] arg <- glue::backtick(as_label(arg)) if (is_formula(x)) { type <- "a two-sided formula" } else { type <- glue("a two-sided formula, not {obj_type_friendly(x)}") } message <- glue("Case {i} ({arg}) must be {type}.") abort(message, call = error_call) } # Formula might be unevaluated, e.g. if it's been quosured env <- f_env(x) %||% default_env list( lhs = new_quosure(f_lhs(x), env), rhs = new_quosure(f_rhs(x), env) ) } dplyr/R/compat-name-repair.R0000644000176200001440000001247413663216626015434 0ustar liggesusers# compat-name-repair (last updated: tibble 2.0.1.9000) # This file serves as a reference for compatibility functions for # name repair in tibble, until name repair is available in rlang. error_name_length_required <- function() { "`n` must be specified, when the `names` attribute is `NULL`." } minimal_names <- function(name, n) { if (is.null(name) && missing(n)) { abort(error_name_length_required()) } ## TODO: address scenarios where name is not NULL and n != length(name)? if (is.null(name)) { rep_len("", n) } else { name %|% "" } } set_minimal_names <- function(x) { new_names <- minimal_names(names(x), n = length(x)) set_names(x, new_names) } unique_names <- function(name, quiet = FALSE, transform = identity) { min_name <- minimal_names(name) naked_name <- strip_pos(min_name) naked_is_empty <- (naked_name == "") new_name <- transform(naked_name) new_name <- append_pos(new_name, needs_suffix = naked_is_empty) duped_after <- duplicated(new_name) | duplicated(new_name, fromLast = TRUE) new_name <- append_pos(new_name, duped_after) if (!quiet) { describe_repair(name, new_name) } new_name } set_unique_names <- function(x, quiet = FALSE) { x <- set_minimal_names(x) new_names <- unique_names(names(x), quiet = quiet) set_names(x, new_names) } universal_names <- function(name, quiet = FALSE) { unique_names(name, quiet = quiet, transform = make_syntactic) } set_universal_names <- function(x, quiet = FALSE) { x <- set_minimal_names(x) new_names <- universal_names(names(x), quiet = quiet) set_names(x, new_names) } ## makes each individual name syntactic ## does not enforce unique-ness make_syntactic <- function(name) { name[is.na(name)] <- "" name[name == ""] <- "." name[name == "..."] <- "...." name <- sub("^_", "._", name) new_name <- make.names(name) X_prefix <- grepl("^X", new_name) & !grepl("^X", name) new_name[X_prefix] <- sub("^X", "", new_name[X_prefix]) dot_suffix <- which(new_name == paste0(name, ".")) new_name[dot_suffix] <- sub("^(.*)[.]$", ".\\1", new_name[dot_suffix]) ## illegal characters have been replaced with '.' via make.names() ## however, we have: ## * declined its addition of 'X' prefixes ## * turned its '.' suffixes to '.' prefixes regex <- paste0( "^(?[.]{0,2})", "(?[0-9]*)", "(?[^0-9]?.*$)" ) re <- re_match(new_name, pattern = regex) needs_dots <- which(re$numbers != "") needs_third_dot <- (re$leftovers[needs_dots] == "") re$leading_dots[needs_dots] <- ifelse(needs_third_dot, "...", "..") new_name <- paste0(re$leading_dots, re$numbers, re$leftovers) new_name } append_pos <- function(name, needs_suffix) { need_append_pos <- which(needs_suffix) name[need_append_pos] <- paste0(name[need_append_pos], "..", need_append_pos) name } strip_pos <- function(name) { rx <- "[.][.][1-9][0-9]*$" gsub(rx, "", name) %|% "" } describe_repair <- function(orig_name, name) { if(length(orig_name) != length(name)) { abort(c( "`orig_name` and `name` have different sizes.", i = glue("`orig_name` is of size {length(orig_name)}."), i = glue("`name` is of size {length(name)}.") )) } new_names <- name != minimal_names(orig_name) if (any(new_names)) { msg <- bullets( "New names:", paste0( tick_if_needed(orig_name[new_names]), " -> ", tick_if_needed(name[new_names]), .problem = "" ) ) message(msg) } } ## from rematch2, except we don't add tbl_df or tbl classes to the return value re_match <- function(text, pattern, perl = TRUE, ...) { if (!is.character(pattern) || length(pattern) != 1L || is.na(pattern)) { abort(c( "incompatible `pattern`.", i = "`pattern` should be a scalar string." )) } text <- as.character(text) match <- regexpr(pattern, text, perl = perl, ...) start <- as.vector(match) length <- attr(match, "match.length") end <- start + length - 1L matchstr <- substring(text, start, end) matchstr[ start == -1 ] <- NA_character_ res <- data.frame( stringsAsFactors = FALSE, .text = text, .match = matchstr ) if (!is.null(attr(match, "capture.start"))) { gstart <- attr(match, "capture.start") glength <- attr(match, "capture.length") gend <- gstart + glength - 1L groupstr <- substring(text, gstart, gend) groupstr[ gstart == -1 ] <- NA_character_ dim(groupstr) <- dim(gstart) res <- cbind(groupstr, res, stringsAsFactors = FALSE) } names(res) <- c(attr(match, "capture.names"), ".text", ".match") res } # A better version (with far more dependencies) exists in msg-format.R bullets <- function(header, ..., .problem) { problems <- c(...) MAX_BULLETS <- 6L if (length(problems) >= MAX_BULLETS) { n_more <- length(problems) - MAX_BULLETS + 1L problems[[MAX_BULLETS]] <- "..." length(problems) <- MAX_BULLETS } paste0( header, "\n", paste0("* ", problems, collapse = "\n") ) } # FIXME: Also exists in pillar, do we need to export? tick <- function(x) { ifelse(is.na(x), "NA", encodeString(x, quote = "`")) } is_syntactic <- function(x) { ret <- (make_syntactic(x) == x) ret[is.na(x)] <- FALSE ret } tick_if_needed <- function(x) { needs_ticks <- !is_syntactic(x) x[needs_ticks] <- tick(x[needs_ticks]) x } dplyr/R/group-map.R0000644000176200001440000001513514366556340013660 0ustar liggesusers as_group_map_function <- function(.f, error_call = caller_env()) { .f <- rlang::as_function(.f) if (length(form <- formals(.f)) < 2 && ! "..." %in% names(form)){ bullets <- c( "`.f` must accept at least two arguments.", i = "You can use `...` to absorb unused components." ) abort(bullets, call = error_call) } .f } #' Apply a function to each group #' #' @description #' `r lifecycle::badge("experimental")` #' #' `group_map()`, `group_modify()` and `group_walk()` are purrr-style functions that can #' be used to iterate on grouped tibbles. #' #' @details #' Use `group_modify()` when `summarize()` is too limited, in terms of what you need #' to do and return for each group. `group_modify()` is good for "data frame in, data frame out". #' If that is too limited, you need to use a [nested][group_nest()] or [split][group_split()] workflow. #' `group_modify()` is an evolution of [do()], if you have used that before. #' #' Each conceptual group of the data frame is exposed to the function `.f` with two pieces of information: #' #' - The subset of the data for the group, exposed as `.x`. #' - The key, a tibble with exactly one row and columns for each grouping variable, exposed as `.y`. #' #' For completeness, `group_modify()`, `group_map` and `group_walk()` also work on #' ungrouped data frames, in that case the function is applied to the #' entire data frame (exposed as `.x`), and `.y` is a one row tibble with no #' column, consistently with [group_keys()]. #' #' @family grouping functions #' #' @param .data A grouped tibble #' @param .f A function or formula to apply to each group. #' #' If a __function__, it is used as is. It should have at least 2 formal arguments. #' #' If a __formula__, e.g. `~ head(.x)`, it is converted to a function. #' #' In the formula, you can use #' #' - `.` or `.x` to refer to the subset of rows of `.tbl` #' for the given group #' #' - `.y` to refer to the key, a one row tibble with one column per grouping variable #' that identifies the group #' #' @param ... Additional arguments passed on to `.f` #' @param .keep are the grouping variables kept in `.x` #' #' @return #' - `group_modify()` returns a grouped tibble. In that case `.f` must return a data frame. #' - `group_map()` returns a list of results from calling `.f` on each group. #' - `group_walk()` calls `.f` for side effects and returns the input `.tbl`, invisibly. #' #' @examples #' #' # return a list #' mtcars %>% #' group_by(cyl) %>% #' group_map(~ head(.x, 2L)) #' #' # return a tibble grouped by `cyl` with 2 rows per group #' # the grouping data is recalculated #' mtcars %>% #' group_by(cyl) %>% #' group_modify(~ head(.x, 2L)) #' @examplesIf requireNamespace("broom", quietly = TRUE) #' #' # a list of tibbles #' iris %>% #' group_by(Species) %>% #' group_map(~ broom::tidy(lm(Petal.Length ~ Sepal.Length, data = .x))) #' #' # a restructured grouped tibble #' iris %>% #' group_by(Species) %>% #' group_modify(~ broom::tidy(lm(Petal.Length ~ Sepal.Length, data = .x))) #' @examples #' #' # a list of vectors #' iris %>% #' group_by(Species) %>% #' group_map(~ quantile(.x$Petal.Length, probs = c(0.25, 0.5, 0.75))) #' #' # to use group_modify() the lambda must return a data frame #' iris %>% #' group_by(Species) %>% #' group_modify(~ { #' quantile(.x$Petal.Length, probs = c(0.25, 0.5, 0.75)) %>% #' tibble::enframe(name = "prob", value = "quantile") #' }) #' #' iris %>% #' group_by(Species) %>% #' group_modify(~ { #' .x %>% #' purrr::map_dfc(fivenum) %>% #' mutate(nms = c("min", "Q1", "median", "Q3", "max")) #' }) #' #' # group_walk() is for side effects #' dir.create(temp <- tempfile()) #' iris %>% #' group_by(Species) %>% #' group_walk(~ write.csv(.x, file = file.path(temp, paste0(.y$Species, ".csv")))) #' list.files(temp, pattern = "csv$") #' unlink(temp, recursive = TRUE) #' #' # group_modify() and ungrouped data frames #' mtcars %>% #' group_modify(~ head(.x, 2L)) #' #' @export group_map <- function(.data, .f, ..., .keep = FALSE) { lifecycle::signal_stage("experimental", "group_map()") UseMethod("group_map") } #' @export group_map.data.frame <- function(.data, .f, ..., .keep = FALSE, keep = deprecated()) { if (!missing(keep)) { lifecycle::deprecate_warn("1.0.0", "group_map(keep = )", "group_map(.keep = )", always = TRUE) .keep <- keep } .f <- as_group_map_function(.f) # call the function on each group chunks <- if (is_grouped_df(.data)) { group_split(.data, .keep = isTRUE(.keep)) } else { group_split(.data) } keys <- group_keys(.data) group_keys <- map(seq_len(nrow(keys)), function(i) keys[i, , drop = FALSE]) if (length(chunks)) { map2(chunks, group_keys, .f, ...) } else { # calling .f with .x and .y set to prototypes structure(list(), ptype = .f(attr(chunks, "ptype"), keys[integer(0L), ], ...)) } } #' @rdname group_map #' @export group_modify <- function(.data, .f, ..., .keep = FALSE) { lifecycle::signal_stage("experimental", "group_map()") UseMethod("group_modify") } #' @export group_modify.data.frame <- function(.data, .f, ..., .keep = FALSE, keep = deprecated()) { if (!missing(keep)) { lifecycle::deprecate_warn("1.0.0", "group_modify(keep = )", "group_modify(.keep = )", always = TRUE) .keep <- keep } .f <- as_group_map_function(.f) .f(.data, group_keys(.data), ...) } #' @export group_modify.grouped_df <- function(.data, .f, ..., .keep = FALSE, keep = deprecated()) { if (!missing(keep)) { lifecycle::deprecate_warn("1.0.0", "group_modify(keep = )", "group_modify(.keep = )", always = TRUE) .keep <- keep } tbl_group_vars <- group_vars(.data) .f <- as_group_map_function(.f) error_call <- current_env() fun <- function(.x, .y){ res <- .f(.x, .y, ...) if (!inherits(res, "data.frame")) { abort("The result of `.f` must be a data frame.", call = error_call) } if (any(bad <- names(res) %in% tbl_group_vars)) { msg <- glue( "The returned data frame cannot contain the original grouping variables: {names}.", names = paste(names(res)[bad], collapse = ", ") ) abort(msg, call = error_call) } bind_cols(.y[rep(1L, nrow(res)), , drop = FALSE], res) } chunks <- group_map(.data, fun, .keep = .keep) res <- if (length(chunks) > 0L) { bind_rows(!!!chunks) } else { attr(chunks, "ptype") } grouped_df(res, group_vars(.data), group_by_drop_default(.data)) } #' @export #' @rdname group_map group_walk <- function(.data, .f, ..., .keep = FALSE) { lifecycle::signal_stage("experimental", "group_walk()") group_map(.data, .f, ..., .keep = .keep) invisible(.data) } dplyr/R/grouped-df.R0000644000176200001440000002044514366556340014005 0ustar liggesusers#' A grouped data frame. #' #' @description #' The easiest way to create a grouped data frame is to call the `group_by()` #' method on a data frame or tbl: this will take care of capturing #' the unevaluated expressions for you. #' #' These functions are designed for programmatic use. For data analysis #' purposes see [group_data()] for the accessor functions that retrieve #' various metadata from a grouped data frames. #' #' @keywords internal #' @param data a tbl or data frame. #' @param vars A character vector. #' @param drop When `.drop = TRUE`, empty groups are dropped. #' #' @export grouped_df <- function(data, vars, drop = group_by_drop_default(data)) { if (!is.data.frame(data)) { abort("`data` must be a data frame.") } if (!is.character(vars)) { abort("`vars` must be a character vector.") } if (length(vars) == 0) { as_tibble(data) } else { groups <- compute_groups(data, vars, drop = drop) new_grouped_df(data, groups) } } compute_groups <- function(data, vars, drop = FALSE) { unknown <- setdiff(vars, names(data)) if (length(unknown) > 0) { vars <- paste0(encodeString(vars, quote = "`"), collapse = ", ") abort(glue("`vars` missing from `data`: {vars}.")) } # Only train the dictionary based on selected columns group_vars <- as_tibble(data)[vars] split_key_loc <- dplyr_locate_sorted_groups(group_vars) old_keys <- split_key_loc$key old_rows <- split_key_loc$loc signal("", class = "dplyr_regroup") groups <- tibble(!!!old_keys, ".rows" := old_rows) if (!isTRUE(drop) && any(map_lgl(old_keys, is.factor))) { # Extra work is needed to auto expand empty groups uniques <- map(old_keys, function(.) { if (is.factor(.)) . else vec_unique(.) }) # Internally we only work with integers # # so for any grouping column that is not a factor # we need to match the values to the unique values positions <- map2(old_keys, uniques, function(.x, .y) { if (is.factor(.x)) .x else vec_match(.x, .y) }) # Expand groups internally adds empty groups recursively # we get back: # - indices: a list of how to vec_slice the current keys # to get the new keys # # - rows: the new list of rows (i.e. the same as old rows, # but with some extra empty integer(0) added for empty groups) expanded <- expand_groups(groups, positions, vec_size(old_keys)) new_indices <- expanded$indices new_rows <- expanded$rows # Make the new keys from the old keys and the new_indices new_keys <- pmap(list(old_keys, new_indices, uniques), function(key, index, unique) { if (is.factor(key)) { if (is.ordered(key)) { new_ordered(index, levels = levels(key)) } else { new_factor(index, levels = levels(key)) } } else { vec_slice(unique, index) } }) names(new_keys) <- vars groups <- tibble(!!!new_keys, ".rows" := new_rows) } attr(groups, ".drop") <- drop groups } count_regroups <- function(code) { i <- 0 withCallingHandlers(code, dplyr_regroup = function(cnd) { i <<- i + 1 }) i } show_regroups <- function(code) { withCallingHandlers(code, dplyr_regroup = function(cnd) { cat("Regrouping...\n") }) } #' Low-level construction and validation for the grouped_df and rowwise_df classes #' #' `new_grouped_df()` and `new_rowwise_df()` are constructors designed to be high-performance so only #' check types, not values. This means it is the caller's responsibility #' to create valid values, and hence this is for expert use only. #' #' @param x A data frame #' @param groups The grouped structure, `groups` should be a data frame. #' Its last column should be called `.rows` and be #' a list of 1 based integer vectors that all are between 1 and the number of rows of `.data`. #' @param class additional class, will be prepended to canonical classes. #' @param ... additional attributes #' #' @examples #' # 5 bootstrap samples #' tbl <- new_grouped_df( #' tibble(x = rnorm(10)), #' groups = tibble(".rows" := replicate(5, sample(1:10, replace = TRUE), simplify = FALSE)) #' ) #' # mean of each bootstrap sample #' summarise(tbl, x = mean(x)) #' #' @keywords internal #' @export new_grouped_df <- function(x, groups, ..., class = character()) { if (!is.data.frame(x)) { abort("`x` must be a data frame.") } if (!is.data.frame(groups)) { abort("`groups` must be a data frame") } if (tail(names(groups), 1L) != ".rows") { abort('The last column of `groups` must be called ".rows".') } new_tibble( x, groups = groups, ..., nrow = NROW(x), class = c(class, "grouped_df") ) } #' @description #' `validate_grouped_df()` and `validate_rowwise_df()` validate the attributes #' of a `grouped_df` or a `rowwise_df`. #' #' @param check_bounds whether to check all indices for out of bounds problems in `grouped_df` objects #' @rdname new_grouped_df #' @export validate_grouped_df <- function(x, check_bounds = FALSE) { if (is.null(attr(x, "groups")) && !is.null(attr(x, "vars"))) { bullets <- c( "Corrupt `grouped_df` using old (< 0.8.0) format.", i = "Strip off old grouping with `ungroup()`." ) abort(bullets) } result <- .Call(`dplyr_validate_grouped_df`, x, check_bounds) if (!is.null(result)) { abort(result) } x } setOldClass(c("grouped_df", "tbl_df", "tbl", "data.frame")) #' @rdname grouped_df #' @export is.grouped_df <- function(x) inherits(x, "grouped_df") #' @rdname grouped_df #' @export is_grouped_df <- is.grouped_df group_sum <- function(x) { grps <- n_groups(x) paste0(commas(group_vars(x)), " [", big_mark(grps), "]") } #' @importFrom pillar tbl_sum #' @export tbl_sum.grouped_df <- function(x, ...) { c( NextMethod(), c("Groups" = group_sum(x)) ) } #' @export as.data.frame.grouped_df <- function(x, row.names = NULL, optional = FALSE, ...) { new_data_frame(vec_data(x), n = nrow(x)) } #' @export as_tibble.grouped_df <- function(x, ...) { new_tibble(vec_data(x), nrow = nrow(x)) } #' @export `[.grouped_df` <- function(x, i, j, drop = FALSE) { out <- NextMethod() if (!is.data.frame(out)) { return(out) } if (drop) { as_tibble(out) } else { groups <- group_intersect(x, out) if ((missing(i) || nargs() == 2) && identical(groups, group_vars(x))) { new_grouped_df(out, group_data(x)) } else { grouped_df(out, groups, group_by_drop_default(x)) } } } #' @export `$<-.grouped_df` <- function(x, name, ..., value) { out <- NextMethod() if (name %in% group_vars(x)) { grouped_df(out, group_intersect(x, out), group_by_drop_default(x)) } else { out } } #' @export `[<-.grouped_df` <- function(x, i, j, ..., value) { out <- NextMethod() grouped_df(out, group_intersect(x, out), group_by_drop_default(x)) } #' @export `[[<-.grouped_df` <- function(x, ..., value) { out <- NextMethod() grouped_df(out, group_intersect(x, out), group_by_drop_default(x)) } #' @export `names<-.grouped_df` <- function(x, value) { data <- as.data.frame(x) names(data) <- value groups <- group_data(x) group_loc <- match(intersect(names(groups), names(x)), names(x)) group_names <- c(value[group_loc], ".rows") if (!identical(group_names, names(groups))) { names(groups) <- c(value[group_loc], ".rows") } new_grouped_df(data, groups) } #' @method rbind grouped_df #' @export rbind.grouped_df <- function(...) { bind_rows(...) } #' @method cbind grouped_df #' @export cbind.grouped_df <- function(...) { bind_cols(...) } group_data_trim <- function(group_data, preserve = FALSE) { if (preserve) { return(group_data) } non_empty <- lengths(group_data$".rows") > 0 group_data[non_empty, , drop = FALSE] } # Helpers ----------------------------------------------------------------- expand_groups <- function(old_groups, positions, nr) { .Call(`dplyr_expand_groups`, old_groups, positions, nr) } dplyr_locate_sorted_groups <- function(x) { out <- vec_locate_sorted_groups(x, nan_distinct = TRUE) out$loc <- new_list_of(out$loc, ptype = integer()) if (dplyr_legacy_locale()) { # Temporary legacy support for respecting the system locale. # Matches legacy `arrange()` ordering. out <- vec_slice(out, dplyr_order_legacy(out$key)) } out } group_intersect <- function(x, new) { intersect(group_vars(x), names(new)) } dplyr/R/defunct.R0000644000176200001440000000511614366556340013377 0ustar liggesusers#' Defunct functions #' #' @description #' `r lifecycle::badge("defunct")` #' #' These functions were deprecated for at least two years before being #' made defunct. If there's a known replacement, calling the function #' will tell you about it. #' #' @keywords internal #' @name defunct NULL #' @usage # Deprecated in 0.5.0 ------------------------------------- #' @name defunct NULL #' @export #' @rdname defunct id <- function(.variables, drop = FALSE) { lifecycle::deprecate_stop("0.5.0", "id()", "vctrs::vec_group_id()") } #' @usage # Deprecated in 0.7.0 ------------------------------------- #' @name defunct NULL #' @export #' @rdname defunct failwith <- function(default = NULL, f, quiet = FALSE) { lifecycle::deprecate_stop("0.7.0", "failwith()", "purrr::possibly()") } #' @usage # Deprecated in 0.8.* ------------------------------------- #' @name defunct NULL #' @export #' @rdname defunct select_vars <- function(vars = chr(), ..., include = chr(), exclude = chr()) { lifecycle::deprecate_stop("0.8.4", "select_vars()", "tidyselect::vars_select()") } #' @export #' @rdname defunct rename_vars <- function(vars = chr(), ..., strict = TRUE) { lifecycle::deprecate_stop("0.8.4", "rename_vars()", "tidyselect::vars_rename()") } #' @export #' @rdname defunct select_var <- function(vars, var = -1) { lifecycle::deprecate_stop("0.8.4", "select_var()", "tidyselect::vars_pull()") } #' @export #' @rdname defunct current_vars <- function(...) { lifecycle::deprecate_stop("0.8.4", "current_vars()", "tidyselect::peek_vars()") } #' @usage # Deprecated in 1.0.0 ------------------------------------- #' @name defunct NULL #' @export #' @rdname defunct bench_tbls <- function(tbls, op, ..., times = 10) { lifecycle::deprecate_stop("1.0.0", "bench_tbls()") } #' @export #' @rdname defunct compare_tbls <- function(tbls, op, ref = NULL, compare = equal_data_frame, ...) { lifecycle::deprecate_stop("1.0.0", "compare_tbls()") } #' @export #' @rdname defunct compare_tbls2 <- function(tbls_x, tbls_y, op, ref = NULL, compare = equal_data_frame, ...) { lifecycle::deprecate_stop("1.0.0", "compare_tbls2()") } #' @export #' @rdname defunct eval_tbls <- function(tbls, op) { lifecycle::deprecate_stop("1.0.0", "eval_tbls()") } #' @export #' @rdname defunct eval_tbls2 <- function(tbls_x, tbls_y, op) { lifecycle::deprecate_stop("1.0.0", "eval_tbls2()") } #' @export #' @rdname defunct location <- function(df) { lifecycle::deprecate_stop("1.0.0", "location()", "lobstr::ref()") } #' @export #' @rdname defunct changes <- function(x, y) { lifecycle::deprecate_stop("1.0.0", "changes()", "lobstr::ref()") } dplyr/R/order-by.R0000644000176200001440000000405514406402754013465 0ustar liggesusers#' A helper function for ordering window function output #' #' This function makes it possible to control the ordering of window functions #' in R that don't have a specific ordering parameter. When translated to SQL #' it will modify the order clause of the OVER function. #' #' This function works by changing the `call` to instead call #' [with_order()] with the appropriate arguments. #' #' @param order_by a vector to order_by #' @param call a function call to a window function, where the first argument #' is the vector being operated on #' @export #' @examples #' order_by(10:1, cumsum(1:10)) #' x <- 10:1 #' y <- 1:10 #' order_by(x, cumsum(y)) #' #' df <- data.frame(year = 2000:2005, value = (0:5) ^ 2) #' scrambled <- df[sample(nrow(df)), ] #' #' wrong <- mutate(scrambled, running = cumsum(value)) #' arrange(wrong, year) #' #' right <- mutate(scrambled, running = order_by(year, cumsum(value))) #' arrange(right, year) order_by <- function(order_by, call) { quo <- enquo(call) expr <- quo_get_expr(quo) if (!is_call(expr)) { if (is_symbol(expr)) { bullets <- c( glue("`call` must be a function call, not a symbol."), i = glue("Did you mean `arrange({as_label(enquo(order_by))}, {expr})`?") ) abort(bullets) } else { type <- obj_type_friendly(expr) msg <- glue("`call` must be a function call, not { type }.") abort(msg) } } fn <- set_expr(quo, expr[[1]]) args <- map(expr[-1], new_quosure, quo_get_env(quo)) expr <- expr(with_order(!!order_by, !!fn, !!!args)) eval_tidy(expr) } #' Run a function with one order, translating result back to original order #' #' This is used to power the ordering parameters of dplyr's window functions #' #' @param order_by vector to order by #' @param fun window function #' @param x,... arguments to `f` #' @keywords internal #' @export with_order <- function(order_by, fun, x, ...) { vec_check_size(order_by, size = vec_size(x)) o <- vec_order_radix(order_by) x <- vec_slice(x, o) out <- fun(x, ...) o <- vec_order_radix(o) vec_slice(out, o) } dplyr/R/mutate.R0000644000176200001440000003733414406402754013247 0ustar liggesusers#' Create, modify, and delete columns #' #' `mutate()` creates new columns that are functions of existing variables. #' It can also modify (if the name is the same as an existing #' column) and delete columns (by setting their value to `NULL`). #' #' @section Useful mutate functions: #' #' * [`+`], [`-`], [log()], etc., for their usual mathematical meanings #' #' * [lead()], [lag()] #' #' * [dense_rank()], [min_rank()], [percent_rank()], [row_number()], #' [cume_dist()], [ntile()] #' #' * [cumsum()], [cummean()], [cummin()], [cummax()], [cumany()], [cumall()] #' #' * [na_if()], [coalesce()] #' #' * [if_else()], [recode()], [case_when()] #' #' @section Grouped tibbles: #' #' Because mutating expressions are computed within groups, they may #' yield different results on grouped tibbles. This will be the case #' as soon as an aggregating, lagging, or ranking function is #' involved. Compare this ungrouped mutate: #' #' ``` #' starwars %>% #' select(name, mass, species) %>% #' mutate(mass_norm = mass / mean(mass, na.rm = TRUE)) #' ``` #' #' With the grouped equivalent: #' #' ``` #' starwars %>% #' select(name, mass, species) %>% #' group_by(species) %>% #' mutate(mass_norm = mass / mean(mass, na.rm = TRUE)) #' ``` #' #' The former normalises `mass` by the global average whereas the #' latter normalises by the averages within species levels. #' #' @export #' @inheritParams arrange #' @param ... <[`data-masking`][rlang::args_data_masking]> Name-value pairs. #' The name gives the name of the column in the output. #' #' The value can be: #' #' * A vector of length 1, which will be recycled to the correct length. #' * A vector the same length as the current group (or the whole data frame #' if ungrouped). #' * `NULL`, to remove the column. #' * A data frame or tibble, to create multiple columns in the output. #' @family single table verbs #' @return #' An object of the same type as `.data`. The output has the following #' properties: #' #' * Columns from `.data` will be preserved according to the `.keep` argument. #' * Existing columns that are modified by `...` will always be returned in #' their original location. #' * New columns created through `...` will be placed according to the #' `.before` and `.after` arguments. #' * The number of rows is not affected. #' * Columns given the value `NULL` will be removed. #' * Groups will be recomputed if a grouping variable is mutated. #' * Data frame attributes are preserved. #' @section Methods: #' This function is a **generic**, which means that packages can provide #' implementations (methods) for other classes. See the documentation of #' individual methods for extra arguments and differences in behaviour. #' #' Methods available in currently loaded packages: #' \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("mutate")}. #' @examples #' # Newly created variables are available immediately #' starwars %>% #' select(name, mass) %>% #' mutate( #' mass2 = mass * 2, #' mass2_squared = mass2 * mass2 #' ) #' #' # As well as adding new variables, you can use mutate() to #' # remove variables and modify existing variables. #' starwars %>% #' select(name, height, mass, homeworld) %>% #' mutate( #' mass = NULL, #' height = height * 0.0328084 # convert to feet #' ) #' #' # Use across() with mutate() to apply a transformation #' # to multiple columns in a tibble. #' starwars %>% #' select(name, homeworld, species) %>% #' mutate(across(!name, as.factor)) #' # see more in ?across #' #' # Window functions are useful for grouped mutates: #' starwars %>% #' select(name, mass, homeworld) %>% #' group_by(homeworld) %>% #' mutate(rank = min_rank(desc(mass))) #' # see `vignette("window-functions")` for more details #' #' # By default, new columns are placed on the far right. #' df <- tibble(x = 1, y = 2) #' df %>% mutate(z = x + y) #' df %>% mutate(z = x + y, .before = 1) #' df %>% mutate(z = x + y, .after = x) #' #' # By default, mutate() keeps all columns from the input data. #' df <- tibble(x = 1, y = 2, a = "a", b = "b") #' df %>% mutate(z = x + y, .keep = "all") # the default #' df %>% mutate(z = x + y, .keep = "used") #' df %>% mutate(z = x + y, .keep = "unused") #' df %>% mutate(z = x + y, .keep = "none") #' #' # Grouping ---------------------------------------- #' # The mutate operation may yield different results on grouped #' # tibbles because the expressions are computed within groups. #' # The following normalises `mass` by the global average: #' starwars %>% #' select(name, mass, species) %>% #' mutate(mass_norm = mass / mean(mass, na.rm = TRUE)) #' #' # Whereas this normalises `mass` by the averages within species #' # levels: #' starwars %>% #' select(name, mass, species) %>% #' group_by(species) %>% #' mutate(mass_norm = mass / mean(mass, na.rm = TRUE)) #' #' # Indirection ---------------------------------------- #' # Refer to column names stored as strings with the `.data` pronoun: #' vars <- c("mass", "height") #' mutate(starwars, prod = .data[[vars[[1]]]] * .data[[vars[[2]]]]) #' # Learn more in ?rlang::args_data_masking mutate <- function(.data, ...) { UseMethod("mutate") } #' @rdname mutate #' #' @inheritParams args_by #' #' @param .keep #' Control which columns from `.data` are retained in the output. Grouping #' columns and columns created by `...` are always kept. #' #' * `"all"` retains all columns from `.data`. This is the default. #' * `"used"` retains only the columns used in `...` to create new #' columns. This is useful for checking your work, as it displays inputs #' and outputs side-by-side. #' * `"unused"` retains only the columns _not_ used in `...` to create new #' columns. This is useful if you generate new columns, but no longer need #' the columns used to generate them. #' * `"none"` doesn't retain any extra columns from `.data`. Only the grouping #' variables and columns created by `...` are kept. #' @param .before,.after #' <[`tidy-select`][dplyr_tidy_select]> Optionally, control where new columns #' should appear (the default is to add to the right hand side). See #' [relocate()] for more details. #' @export mutate.data.frame <- function(.data, ..., .by = NULL, .keep = c("all", "used", "unused", "none"), .before = NULL, .after = NULL) { keep <- arg_match0(.keep, values = c("all", "used", "unused", "none")) by <- compute_by({{ .by }}, .data, by_arg = ".by", data_arg = ".data") cols <- mutate_cols(.data, dplyr_quosures(...), by) used <- attr(cols, "used") out <- dplyr_col_modify(.data, cols) names_original <- names(.data) out <- mutate_relocate( out = out, before = {{ .before }}, after = {{ .after }}, names_original = names_original ) names_new <- names(cols) names_groups <- by$names out <- mutate_keep( out = out, keep = keep, used = used, names_new = names_new, names_groups = names_groups ) out } # Helpers ----------------------------------------------------------------- mutate_relocate <- function(out, before, after, names_original) { before <- enquo(before) after <- enquo(after) if (quo_is_null(before) && quo_is_null(after)) { return(out) } # Only change the order of completely new columns that # didn't exist in the original data names <- names(out) names <- setdiff(names, names_original) relocate( out, all_of(names), .before = !!before, .after = !!after ) } mutate_keep <- function(out, keep, used, names_new, names_groups) { names <- names(out) if (keep == "all") { names_out <- names } else { names_keep <- switch( keep, used = names(used)[used], unused = names(used)[!used], none = character(), abort("Unknown `keep`.", .internal = TRUE) ) names_out <- intersect(names, c(names_new, names_groups, names_keep)) } dplyr_col_select(out, names_out) } mutate_cols <- function(data, dots, by, error_call = caller_env()) { # Collect dots before setting up error handlers (#6178) force(dots) error_call <- dplyr_error_call(error_call) mask <- DataMask$new(data, by, "mutate", error_call = error_call) old_current_column <- context_peek_bare("column") on.exit(context_poke("column", old_current_column), add = TRUE) on.exit(mask$forget(), add = TRUE) new_columns <- set_names(list(), character()) warnings_state <- env(warnings = list()) local_error_context(dots, 0L, mask = mask) withCallingHandlers( for (i in seq_along(dots)) { poke_error_context(dots, i, mask = mask) context_poke("column", old_current_column) new_columns <- mutate_col(dots[[i]], data, mask, new_columns) }, error = dplyr_error_handler( dots = dots, mask = mask, bullets = mutate_bullets, error_call = error_call, error_class = "dplyr:::mutate_error" ), warning = dplyr_warning_handler( state = warnings_state, mask = mask, error_call = error_call ) ) signal_warnings(warnings_state, error_call) is_zap <- map_lgl(new_columns, inherits, "rlang_zap") new_columns[is_zap] <- rep(list(NULL), sum(is_zap)) used <- mask$get_used() names(used) <- mask$current_vars() attr(new_columns, "used") <- used new_columns } mutate_col <- function(dot, data, mask, new_columns) { rows <- mask$get_rows() # get results from all the quosures that are expanded from ..i # then ingest them after dot <- expand_pick(dot, mask) quosures <- expand_across(dot) quosures_results <- vector(mode = "list", length = length(quosures)) # First pass for (k in seq_along(quosures)) { quo <- quosures[[k]] quo_data <- attr(quo, "dplyr:::data") if (!is.null(quo_data$column)) { context_poke("column", quo_data$column) } # a list in which each element is the result of # evaluating the quosure in the "sliced data mask" # recycling it appropriately to match the group size # # TODO: reinject hybrid evaluation at the R level chunks <- NULL # result after unchopping the chunks result <- NULL if (quo_is_symbol(quo)){ name <- as_string(quo_get_expr(quo)) if (name %in% names(new_columns)) { # already have result and chunks result <- new_columns[[name]] chunks <- mask$resolve(name) } else if (name %in% names(data)) { # column from the original data result <- data[[name]] chunks <- mask$resolve(name) } if (mask$is_rowwise() && obj_is_list(result)) { sizes <- list_sizes(result) wrong <- which(sizes != 1) if (length(wrong)) { # same error as would have been generated by mask$eval_all_mutate() group <- wrong[1L] mask$set_current_group(group) abort( class = c("dplyr:::mutate_incompatible_size", "dplyr:::internal_error"), dplyr_error_data = list(result_size = sizes[group], expected_size = 1) ) } result_ptype <- attr(result, "ptype", exact = TRUE) if (length(result) == 0 && is.null(result_ptype)) { # i.e. `vec_ptype_finalise(unspecified())` (#6369) result <- logical() } else { result <- list_unchop(result, ptype = result_ptype) } } } else if (!quo_is_symbolic(quo) && !is.null(quo_get_expr(quo))) { # constant, we still need both `result` and `chunks` result <- quo_get_expr(quo) result <- withCallingHandlers( vec_recycle(result, vec_size(data)), error = function(cnd) { abort( class = c("dplyr:::mutate_constant_recycle_error", "dplyr:::internal_error"), constant_size = vec_size(result), data_size = vec_size(data) ) } ) chunks <- vec_chop(result, rows) } if (is.null(chunks)) { if (is.null(quo_data$column)) { chunks <- mask$eval_all_mutate(quo) } else { chunks <- withCallingHandlers( mask$eval_all_mutate(quo), error = function(cnd) { name <- dplyr_quosure_name(quo_data) msg <- glue("Can't compute column `{name}`.") abort(msg, call = call("across"), parent = cnd) } ) } } if (is.null(chunks)) { next } # only unchop if needed if (is.null(result)) { if (length(rows) == 1) { result <- chunks[[1]] } else { # `name` specified lazily chunks <- dplyr_vec_cast_common(chunks, name = dplyr_quosure_name(quo_data)) result <- list_unchop(chunks, indices = rows) } } quosures_results[[k]] <- list(result = result, chunks = chunks) } # Second pass for (k in seq_along(quosures)) { quo <- quosures[[k]] quo_data <- attr(quo, "dplyr:::data") quo_result <- quosures_results[[k]] if (is.null(quo_result)) { if (quo_data$is_named) { name <- dplyr_quosure_name(quo_data) new_columns[[name]] <- zap() mask$remove(name) } next } result <- quo_result$result chunks <- quo_result$chunks if (!quo_data$is_named && is.data.frame(result)) { types <- vec_ptype(result) types_names <- names(types) chunks_extracted <- .Call(dplyr_extract_chunks, chunks, types) for (j in seq_along(types)) { mask$add_one(types_names[j], chunks_extracted[[j]], result = result[[j]]) } new_columns[types_names] <- result } else { # treat as a single output otherwise name <- dplyr_quosure_name(quo_data) mask$add_one(name = name, chunks = chunks, result = result) new_columns[[name]] <- result } } new_columns } mutate_bullets <- function(cnd, ...) { UseMethod("mutate_bullets") } #' @export `mutate_bullets.dplyr:::mutate_incompatible_size` <- function(cnd, ...) { label <- ctxt_error_label() result_size <- cnd$dplyr_error_data$result_size expected_size <- cnd$dplyr_error_data$expected_size c( glue("`{label}` must be size {or_1(expected_size)}, not {result_size}."), i = cnd_bullet_rowwise_unlist() ) } #' @export `mutate_bullets.dplyr:::mutate_mixed_null` <- function(cnd, ...) { label <- ctxt_error_label() c( glue("`{label}` must return compatible vectors across groups."), x = "Can't combine NULL and non NULL results.", i = cnd_bullet_rowwise_unlist() ) } #' @export `mutate_bullets.dplyr:::mutate_not_vector` <- function(cnd, ...) { label <- ctxt_error_label() result <- cnd$dplyr_error_data$result c( glue("`{label}` must be a vector, not {obj_type_friendly(result)}."), i = cnd_bullet_rowwise_unlist() ) } #' @export `mutate_bullets.dplyr:::error_incompatible_combine` <- function(cnd, ...) { # the details are included in the parent error c() } #' @export `mutate_bullets.dplyr:::mutate_constant_recycle_error` <- function(cnd, ...) { label <- ctxt_error_label() constant_size <- cnd$constant_size data_size <- cnd$data_size c( glue("Inlined constant `{label}` must be size {or_1(data_size)}, not {constant_size}.") ) } check_muffled_warning <- function(cnd) { early_exit <- TRUE # Cancel early exits, e.g. from an exiting handler. This way we can # still instrument caught warnings to avoid confusing # inconsistencies. This doesn't work on versions of R older than # 3.5.0 because they don't include this change: # https://github.com/wch/r-source/commit/688eaebf. So with # `tryCatch(warning = )`, the original warning `cnd` will be caught # instead of the instrumented warning. on.exit( if (can_return_from_exit && early_exit) { return(FALSE) } ) muffled <- withRestarts( muffleWarning = function(...) TRUE, { signalCondition(cnd) FALSE } ) early_exit <- FALSE muffled } on_load( can_return_from_exit <- getRversion() >= "3.5.0" ) dplyr/NEWS.md0000644000176200001440000041234514525503021012510 0ustar liggesusers# dplyr 1.1.4 * `join_by()` now allows its helper functions to be namespaced with `dplyr::`, like `join_by(dplyr::between(x, lower, upper))` (#6838). * `left_join()` and friends now return a specialized error message if they detect that your join would return more rows than dplyr can handle (#6912). * `slice_*()` now throw the correct error if you forget to name `n` while also prefixing the call with `dplyr::` (#6946). * `dplyr_reconstruct()`'s default method has been rewritten to avoid materializing duckplyr queries too early (#6947). * Updated the `storms` data to include 2022 data (#6937, @steveharoz). * Updated the `starwars` data to use a new API, because the old one is defunct. There are very minor changes to the data itself (#6938, @steveharoz). # dplyr 1.1.3 * `mutate_each()` and `summarise_each()` now throw correct deprecation messages (#6869). * `setequal()` now requires the input data frames to be compatible, similar to the other set methods like `setdiff()` or `intersect()` (#6786). # dplyr 1.1.2 * `count()` better documents that it has a `.drop` argument (#6820). * Fixed tests to maintain compatibility with the next version of waldo (#6823). * Joins better handle key columns will all `NA`s (#6804). # dplyr 1.1.1 * Mutating joins now warn about multiple matches much less often. At a high level, a warning was previously being thrown when a one-to-many or many-to-many relationship was detected between the keys of `x` and `y`, but is now only thrown for a many-to-many relationship, which is much rarer and much more dangerous than one-to-many because it can result in a Cartesian explosion in the number of rows returned from the join (#6731, #6717). We've accomplished this in two steps: * `multiple` now defaults to `"all"`, and the options of `"error"` and `"warning"` are now deprecated in favor of using `relationship` (see below). We are using an accelerated deprecation process for these two options because they've only been available for a few weeks, and `relationship` is a clearly superior alternative. * The mutating joins gain a new `relationship` argument, allowing you to optionally enforce one of the following relationship constraints between the keys of `x` and `y`: `"one-to-one"`, `"one-to-many"`, `"many-to-one"`, or `"many-to-many"`. For example, `"many-to-one"` enforces that each row in `x` can match at most 1 row in `y`. If a row in `x` matches >1 rows in `y`, an error is thrown. This option serves as the replacement for `multiple = "error"`. The default behavior of `relationship` doesn't assume that there is any relationship between `x` and `y`. However, for equality joins it will check for the presence of a many-to-many relationship, and will warn if it detects one. This change unfortunately does mean that if you have set `multiple = "all"` to avoid a warning and you happened to be doing a many-to-many style join, then you will need to replace `multiple = "all"` with `relationship = "many-to-many"` to silence the new warning, but we believe this should be rare since many-to-many relationships are fairly uncommon. * Fixed a major performance regression in `case_when()`. It is still a little slower than in dplyr 1.0.10, but we plan to improve this further in the future (#6674). * Fixed a performance regression related to `nth()`, `first()`, and `last()` (#6682). * Fixed an issue where expressions involving infix operators had an abnormally large amount of overhead (#6681). * `group_data()` on ungrouped data frames is faster (#6736). * `n()` is a little faster when there are many groups (#6727). * `pick()` now returns a 1 row, 0 column tibble when `...` evaluates to an empty selection. This makes it more compatible with [tidyverse recycling rules](https://vctrs.r-lib.org/reference/theory-faq-recycling.html) in some edge cases (#6685). * `if_else()` and `case_when()` again accept logical conditions that have attributes (#6678). * `arrange()` can once again sort the `numeric_version` type from base R (#6680). * `slice_sample()` now works when the input has a column named `replace`. `slice_min()` and `slice_max()` now work when the input has columns named `na_rm` or `with_ties` (#6725). * `nth()` now errors informatively if `n` is `NA` (#6682). * Joins now throw a more informative error when `y` doesn't have the same source as `x` (#6798). * All major dplyr verbs now throw an informative error message if the input data frame contains a column named `NA` or `""` (#6758). * Deprecation warnings thrown by `filter()` now mention the correct package where the problem originated from (#6679). * Fixed an issue where using `<-` within a grouped `mutate()` or `summarise()` could cross contaminate other groups (#6666). * The compatibility vignette has been replaced with a more general vignette on using dplyr in packages, `vignette("in-packages")` (#6702). * The developer documentation in `?dplyr_extending` has been refreshed and brought up to date with all changes made in 1.1.0 (#6695). * `rename_with()` now includes an example of using `paste0(recycle0 = TRUE)` to correctly handle empty selections (#6688). * R >=3.5.0 is now explicitly required. This is in line with the tidyverse policy of supporting the [5 most recent versions of R](https://www.tidyverse.org/blog/2019/04/r-version-support/). # dplyr 1.1.0 ## New features * [`.by`/`by`](https://dplyr.tidyverse.org/reference/dplyr_by.html) is an experimental alternative to `group_by()` that supports per-operation grouping for `mutate()`, `summarise()`, `filter()`, and the `slice()` family (#6528). Rather than: ``` starwars %>% group_by(species, homeworld) %>% summarise(mean_height = mean(height)) ``` You can now write: ``` starwars %>% summarise( mean_height = mean(height), .by = c(species, homeworld) ) ``` The most useful reason to do this is because `.by` only affects a single operation. In the example above, an ungrouped data frame went into the `summarise()` call, so an ungrouped data frame will come out; with `.by`, you never need to remember to `ungroup()` afterwards and you never need to use the `.groups` argument. Additionally, using `summarise()` with `.by` will never sort the results by the group key, unlike with `group_by()`. Instead, the results are returned using the existing ordering of the groups from the original data. We feel this is more predictable, better maintains any ordering you might have already applied with a previous call to `arrange()`, and provides a way to maintain the current ordering without having to resort to factors. This feature was inspired by [data.table](https://CRAN.R-project.org/package=data.table), where the equivalent syntax looks like: ``` starwars[, .(mean_height = mean(height)), by = .(species, homeworld)] ``` `with_groups()` is superseded in favor of `.by` (#6582). * `reframe()` is a new experimental verb that creates a new data frame by applying functions to columns of an existing data frame. It is very similar to `summarise()`, with two big differences: * `reframe()` can return an arbitrary number of rows per group, while `summarise()` reduces each group down to a single row. * `reframe()` always returns an ungrouped data frame, while `summarise()` might return a grouped or rowwise data frame, depending on the scenario. `reframe()` has been added in response to valid concern from the community that allowing `summarise()` to return any number of rows per group increases the chance for accidental bugs. We still feel that this is a powerful technique, and is a principled replacement for `do()`, so we have moved these features to `reframe()` (#6382). * `group_by()` now uses a new algorithm for computing groups. It is often faster than the previous approach (especially when there are many groups), and in most cases there should be no changes. The one exception is with character vectors, see the C locale news bullet below for more details (#4406, #6297). * `arrange()` now uses a faster algorithm for sorting character vectors, which is heavily inspired by data.table's `forder()`. See the C locale news bullet below for more details (#4962). * Joins have been completely overhauled to enable more flexible join operations and provide more tools for quality control. Many of these changes are inspired by data.table's join syntax (#5914, #5661, #5413, #2240). * A _join specification_ can now be created through `join_by()`. This allows you to specify both the left and right hand side of a join using unquoted column names, such as `join_by(sale_date == commercial_date)`. Join specifications can be supplied to any `*_join()` function as the `by` argument. * Join specifications allow for new types of joins: * Equality joins: The most common join, specified by `==`. For example, `join_by(sale_date == commercial_date)`. * Inequality joins: For joining on inequalities, i.e.`>=`, `>`, `<`, and `<=`. For example, use `join_by(sale_date >= commercial_date)` to find every commercial that aired before a particular sale. * Rolling joins: For "rolling" the closest match forward or backwards when there isn't an exact match, specified by using the rolling helper, `closest()`. For example, `join_by(closest(sale_date >= commercial_date))` to find only the most recent commercial that aired before a particular sale. * Overlap joins: For detecting overlaps between sets of columns, specified by using one of the overlap helpers: `between()`, `within()`, or `overlaps()`. For example, use `join_by(between(commercial_date, sale_date_lower, sale_date))` to find commercials that aired before a particular sale, as long as they occurred after some lower bound, such as 40 days before the sale was made. Note that you cannot use arbitrary expressions in the join conditions, like `join_by(sale_date - 40 >= commercial_date)`. Instead, use `mutate()` to create a new column containing the result of `sale_date - 40` and refer to that by name in `join_by()`. * `multiple` is a new argument for controlling what happens when a row in `x` matches multiple rows in `y`. For equality joins and rolling joins, where this is usually surprising, this defaults to signalling a `"warning"`, but still returns all of the matches. For inequality joins, where multiple matches are usually expected, this defaults to returning `"all"` of the matches. You can also return only the `"first"` or `"last"` match, `"any"` of the matches, or you can `"error"`. * `keep` now defaults to `NULL` rather than `FALSE`. `NULL` implies `keep = FALSE` for equality conditions, but `keep = TRUE` for inequality conditions, since you generally want to preserve both sides of an inequality join. * `unmatched` is a new argument for controlling what happens when a row would be dropped because it doesn't have a match. For backwards compatibility, the default is `"drop"`, but you can also choose to `"error"` if dropped rows would be surprising. * `across()` gains an experimental `.unpack` argument to optionally unpack (as in, `tidyr::unpack()`) data frames returned by functions in `.fns` (#6360). * `consecutive_id()` for creating groups based on contiguous runs of the same values, like `data.table::rleid()` (#1534). * `case_match()` is a "vectorised switch" variant of `case_when()` that matches on values rather than logical expressions. It is like a SQL "simple" `CASE WHEN` statement, whereas `case_when()` is like a SQL "searched" `CASE WHEN` statement (#6328). * `cross_join()` is a more explicit and slightly more correct replacement for using `by = character()` during a join (#6604). * `pick()` makes it easy to access a subset of columns from the current group. `pick()` is intended as a replacement for `across(.fns = NULL)`, `cur_data()`, and `cur_data_all()`. We feel that `pick()` is a much more evocative name when you are just trying to select a subset of columns from your data (#6204). * `symdiff()` computes the symmetric difference (#4811). ## Lifecycle changes ### Breaking changes * `arrange()` and `group_by()` now use the C locale, not the system locale, when ordering or grouping character vectors. This brings _substantial_ performance improvements, increases reproducibility across R sessions, makes dplyr more consistent with data.table, and we believe it should affect little existing code. If it does affect your code, you can use `options(dplyr.legacy_locale = TRUE)` to quickly revert to the previous behavior. However, in general, we instead recommend that you use the new `.locale` argument to precisely specify the desired locale. For a full explanation please read the associated [grouping](https://github.com/tidyverse/tidyups/blob/main/006-dplyr-group-by-ordering.md) and [ordering](https://github.com/tidyverse/tidyups/blob/main/003-dplyr-radix-ordering.md) tidyups. * `bench_tbls()`, `compare_tbls()`, `compare_tbls2()`, `eval_tbls()`, `eval_tbls2()`, `location()` and `changes()`, deprecated in 1.0.0, are now defunct (#6387). * `frame_data()`, `data_frame_()`, `lst_()` and `tbl_sum()` are no longer re-exported from tibble (#6276, #6277, #6278, #6284). * `select_vars()`, `rename_vars()`, `select_var()` and `current_vars()`, deprecated in 0.8.4, are now defunct (#6387). ### Newly deprecated * `across()`, `c_across()`, `if_any()`, and `if_all()` now require the `.cols` and `.fns` arguments. In general, we now recommend that you use `pick()` instead of an empty `across()` call or `across()` with no `.fns` (e.g. `across(c(x, y))`. (#6523). * Relying on the previous default of `.cols = everything()` is deprecated. We have skipped the soft-deprecation stage in this case, because indirect usage of `across()` and friends in this way is rare. * Relying on the previous default of `.fns = NULL` is not yet formally soft-deprecated, because there was no good alternative until now, but it is discouraged and will be soft-deprecated in the next minor release. * Passing `...` to `across()` is soft-deprecated because it's ambiguous when those arguments are evaluated. Now, instead of (e.g.) `across(a:b, mean, na.rm = TRUE)` you should write `across(a:b, ~ mean(.x, na.rm = TRUE))` (#6073). * `all_equal()` is deprecated. We've advised against it for some time, and we explicitly recommend you use `all.equal()`, manually reordering the rows and columns as needed (#6324). * `cur_data()` and `cur_data_all()` are soft-deprecated in favour of `pick()` (#6204). * Using `by = character()` to perform a cross join is now soft-deprecated in favor of `cross_join()` (#6604). * `filter()`ing with a 1-column matrix is deprecated (#6091). * `progress_estimate()` is deprecated for all uses (#6387). * Using `summarise()` to produce a 0 or >1 row "summary" is deprecated in favor of the new `reframe()`. See the NEWS bullet about `reframe()` for more details (#6382). * All functions deprecated in 1.0.0 (released April 2020) and earlier now warn every time you use them (#6387). This includes `combine()`, `src_local()`, `src_mysql()`, `src_postgres()`, `src_sqlite()`, `rename_vars_()`, `select_vars_()`, `summarise_each_()`, `mutate_each_()`, `as.tbl()`, `tbl_df()`, and a handful of older arguments. They are likely to be made defunct in the next major version (but not before mid 2024). * `slice()`ing with a 1-column matrix is deprecated. ### Newly superseded * `recode()` is superseded in favour of `case_match()` (#6433). * `recode_factor()` is superseded. We don't have a direct replacement for it yet, but we plan to add one to forcats. In the meantime you can often use `case_match(.ptype = factor(levels = ))` instead (#6433). * `transmute()` is superseded in favour of `mutate(.keep = "none")` (#6414). ### Newly stable * The `.keep`, `.before`, and `.after` arguments to `mutate()` have moved from experimental to stable. * The `rows_*()` family of functions have moved from experimental to stable. ## vctrs Many of dplyr's vector functions have been rewritten to make use of the vctrs package, bringing greater consistency and improved performance. * `between()` can now work with all vector types, not just numeric and date-time. Additionally, `left` and `right` can now also be vectors (with the same length as `x`), and `x`, `left`, and `right` are cast to the common type before the comparison is made (#6183, #6260, #6478). * `case_when()` (#5106): * Has a new `.default` argument that is intended to replace usage of `TRUE ~ default_value` as a more explicit and readable way to specify a default value. In the future, we will deprecate the unsafe recycling of the LHS inputs that allows `TRUE ~` to work, so we encourage you to switch to using `.default`. * No longer requires exact matching of the types of RHS values. For example, the following no longer requires you to use `NA_character_`. ``` x <- c("little", "unknown", "small", "missing", "large") case_when( x %in% c("little", "small") ~ "one", x %in% c("big", "large") ~ "two", x %in% c("missing", "unknown") ~ NA ) ``` * Supports a larger variety of RHS value types. For example, you can use a data frame to create multiple columns at once. * Has new `.ptype` and `.size` arguments which allow you to enforce a particular output type and size. * Has a better error when types or lengths were incompatible (#6261, #6206). * `coalesce()` (#6265): * Discards `NULL` inputs up front. * No longer iterates over the columns of data frame input. Instead, a row is now only coalesced if it is entirely missing, which is consistent with `vctrs::vec_detect_missing()` and greatly simplifies the implementation. * Has new `.ptype` and `.size` arguments which allow you to enforce a particular output type and size. * `first()`, `last()`, and `nth()` (#6331): * When used on a data frame, these functions now return a single row rather than a single column. This is more consistent with the vctrs principle that a data frame is generally treated as a vector of rows. * The `default` is no longer "guessed", and will always automatically be set to a missing value appropriate for the type of `x`. * Error if `n` is not an integer. `nth(x, n = 2)` is fine, but `nth(x, n = 2.5)` is now an error. * No longer support indexing into scalar objects, like `` or scalar S4 objects (#6670). Additionally, they have all gained an `na_rm` argument since they are summary functions (#6242, with contributions from @tnederlof). * `if_else()` gains most of the same benefits as `case_when()`. In particular, `if_else()` now takes the common type of `true`, `false`, and `missing` to determine the output type, meaning that you can now reliably use `NA`, rather than `NA_character_` and friends (#6243). `if_else()` also no longer allows you to supply `NULL` for either `true` or `false`, which was an undocumented usage that we consider to be off-label, because `true` and `false` are intended to be (and documented to be) vector inputs (#6730). * `na_if()` (#6329) now casts `y` to the type of `x` before comparison, which makes it clearer that this function is type and size stable on `x`. In particular, this means that you can no longer do `na_if(, 0)`, which previously accidentally allowed you to replace any instance of `0` across every column of the tibble with `NA`. `na_if()` was never intended to work this way, and this is considered off-label usage. You can also now replace `NaN` values in `x` with `na_if(x, NaN)`. * `lag()` and `lead()` now cast `default` to the type of `x`, rather than taking the common type. This ensures that these functions are type stable on `x` (#6330). * `row_number()`, `min_rank()`, `dense_rank()`, `ntile()`, `cume_dist()`, and `percent_rank()` are faster and work for more types. You can now rank by multiple columns by supplying a data frame (#6428). * `with_order()` now checks that the size of `order_by` is the same size as `x`, and now works correctly when `order_by` is a data frame (#6334). ## Minor improvements and bug fixes * Fixed an issue with latest rlang that caused internal tools (such as `mask$eval_all_summarise()`) to be mentioned in error messages (#6308). * Warnings are enriched with contextualised information in `summarise()` and `filter()` just like they have been in `mutate()` and `arrange()`. * Joins now reference the correct column in `y` when a type error is thrown while joining on two columns with different names (#6465). * Joins on very wide tables are no longer bottlenecked by the application of `suffix` (#6642). * `*_join()` now error if you supply them with additional arguments that aren't used (#6228). * `across()` used without functions inside a rowwise-data frame no longer generates an invalid data frame (#6264). * Anonymous functions supplied with `function()` and `\()` are now inlined by `across()` if possible, which slightly improves performance and makes possible further optimisations in the future. * Functions supplied to `across()` are no longer masked by columns (#6545). For instance, `across(1:2, mean)` will now work as expected even if there is a column called `mean`. * `across()` will now error when supplied `...` without a `.fns` argument (#6638). * `arrange()` now correctly ignores `NULL` inputs (#6193). * `arrange()` now works correctly when `across()` calls are used as the 2nd (or more) ordering expression (#6495). * `arrange(df, mydesc::desc(x))` works correctly when mydesc re-exports `dplyr::desc()` (#6231). * `c_across()` now evaluates `all_of()` correctly and no longer allows you to accidentally select grouping variables (#6522). * `c_across()` now throws a more informative error if you try to rename during column selection (#6522). * dplyr no longer provides `count()` and `tally()` methods for `tbl_sql`. These methods have been accidentally overriding the `tbl_lazy` methods that dbplyr provides, which has resulted in issues with the grouping structure of the output (#6338, tidyverse/dbplyr#940). * `cur_group()` now works correctly with zero row grouped data frames (#6304). * `desc()` gives a useful error message if you give it a non-vector (#6028). * `distinct()` now retains attributes of bare data frames (#6318). * `distinct()` returns columns ordered the way you request, not the same as the input data (#6156). * Error messages in `group_by()`, `distinct()`, `tally()`, and `count()` are now more relevant (#6139). * `group_by_prepare()` loses the `caller_env` argument. It was rarely used and it is no longer needed (#6444). * `group_walk()` gains an explicit `.keep` argument (#6530). * Warnings emitted inside `mutate()` and variants are now collected and stashed away. Run the new `last_dplyr_warnings()` function to see the warnings emitted within dplyr verbs during the last top-level command. This fixes performance issues when thousands of warnings are emitted with rowwise and grouped data frames (#6005, #6236). * `mutate()` behaves a little better with 0-row rowwise inputs (#6303). * A rowwise `mutate()` now automatically unlists list-columns containing length 1 vectors (#6302). * `nest_join()` has gained the `na_matches` argument that all other joins have. * `nest_join()` now preserves the type of `y` (#6295). * `n_distinct()` now errors if you don't give it any input (#6535). * `nth()`, `first()`, `last()`, and `with_order()` now sort character `order_by` vectors in the C locale. Using character vectors for `order_by` is rare, so we expect this to have little practical impact (#6451). * `ntile()` now requires `n` to be a single positive integer. * `relocate()` now works correctly with empty data frames and when `.before` or `.after` result in empty selections (#6167). * `relocate()` no longer drops attributes of bare data frames (#6341). * `relocate()` now retains the last name change when a single column is renamed multiple times while it is being moved. This better matches the behavior of `rename()` (#6209, with help from @eutwt). * `rename()` now contains examples of using `all_of()` and `any_of()` to rename using a named character vector (#6644). * `rename_with()` now disallows renaming in the `.cols` tidy-selection (#6561). * `rename_with()` now checks that the result of `.fn` is the right type and size (#6561). * `rows_insert()` now checks that `y` contains the `by` columns (#6652). * `setequal()` ignores differences between freely coercible types (e.g. integer and double) (#6114) and ignores duplicated rows (#6057). * `slice()` helpers again produce output equivalent to `slice(.data, 0)` when the `n` or `prop` argument is 0, fixing a bug introduced in the previous version (@eutwt, #6184). * `slice()` with no inputs now returns 0 rows. This is mostly for theoretical consistency (#6573). * `slice()` now errors if any expressions in `...` are named. This helps avoid accidentally misspelling an optional argument, such as `.by` (#6554). * `slice_*()` now requires `n` to be an integer. * `slice_*()` generics now perform argument validation. This should make methods more consistent and simpler to implement (#6361). * `slice_min()` and `slice_max()` can `order_by` multiple variables if you supply them as a data.frame or tibble (#6176). * `slice_min()` and `slice_max()` now consistently include missing values in the result if necessary (i.e. there aren't enough non-missing values to reach the `n` or `prop` you have selected). If you don't want missing values to be included at all, set `na_rm = TRUE` (#6177). * `slice_sample()` now accepts negative `n` and `prop` values (#6402). * `slice_sample()` returns a data frame or group with the same number of rows as the input when `replace = FALSE` and `n` is larger than the number of rows or `prop` is larger than 1. This reverts a change made in 1.0.8, returning to the behavior of 1.0.7 (#6185) * `slice_sample()` now gives a more informative error when `replace = FALSE` and the number of rows requested in the sample exceeds the number of rows in the data (#6271). * `storms` has been updated to include 2021 data and some missing storms that were omitted due to an error (@steveharoz, #6320). * `summarise()` now correctly recycles named 0-column data frames (#6509). * `union_all()`, like `union()`, now requires that data frames be compatible: i.e. they have the same columns, and the columns have compatible types. * `where()` is re-exported from tidyselect (#6597). # dplyr 1.0.10 Hot patch release to resolve R CMD check failures. # dplyr 1.0.9 * New `rows_append()` which works like `rows_insert()` but ignores keys and allows you to insert arbitrary rows with a guarantee that the type of `x` won't change (#6249, thanks to @krlmlr for the implementation and @mgirlich for the idea). * The `rows_*()` functions no longer require that the key values in `x` uniquely identify each row. Additionally, `rows_insert()` and `rows_delete()` no longer require that the key values in `y` uniquely identify each row. Relaxing this restriction should make these functions more practically useful for data frames, and alternative backends can enforce this in other ways as needed (i.e. through primary keys) (#5553). * `rows_insert()` gained a new `conflict` argument allowing you greater control over rows in `y` with keys that conflict with keys in `x`. A conflict arises if a key in `y` already exists in `x`. By default, a conflict results in an error, but you can now also `"ignore"` these `y` rows. This is very similar to the `ON CONFLICT DO NOTHING` command from SQL (#5588, with helpful additions from @mgirlich and @krlmlr). * `rows_update()`, `rows_patch()`, and `rows_delete()` gained a new `unmatched` argument allowing you greater control over rows in `y` with keys that are unmatched by the keys in `x`. By default, an unmatched key results in an error, but you can now also `"ignore"` these `y` rows (#5984, #5699). * `rows_delete()` no longer requires that the columns of `y` be a strict subset of `x`. Only the columns specified through `by` will be utilized from `y`, all others will be dropped with a message. * The `rows_*()` functions now always retain the column types of `x`. This behavior was documented, but previously wasn't being applied correctly (#6240). * The `rows_*()` functions now fail elegantly if `y` is a zero column data frame and `by` isn't specified (#6179). # dplyr 1.0.8 * Better display of error messages thanks to rlang 1.0.0. * `mutate(.keep = "none")` is no longer identical to `transmute()`. `transmute()` has not been changed, and completely ignores the column ordering of the existing data, instead relying on the ordering of expressions supplied through `...`. `mutate(.keep = "none")` has been changed to ensure that pre-existing columns are never moved, which aligns more closely with the other `.keep` options (#6086). * `filter()` forbids matrix results (#5973) and warns about data frame results, especially data frames created from `across()` with a hint to use `if_any()` or `if_all()`. * `slice()` helpers (`slice_head()`, `slice_tail()`, `slice_min()`, `slice_max()`) now accept negative values for `n` and `prop` (#5961). * `slice()` now indicates which group produces an error (#5931). * `cur_data()` and `cur_data_all()` don't simplify list columns in rowwise data frames (#5901). * dplyr now uses `rlang::check_installed()` to prompt you whether to install required packages that are missing. * `storms` data updated to 2020 (@steveharoz, #5899). * `coalesce()` accepts 1-D arrays (#5557). * The deprecated `trunc_mat()` is no longer reexported from dplyr (#6141). # dplyr 1.0.7 * `across()` uses the formula environment when inlining them (#5886). * `summarise.rowwise_df()` is quiet when the result is ungrouped (#5875). * `c_across()` and `across()` key deparsing not confused by long calls (#5883). * `across()` handles named selections (#5207). # dplyr 1.0.6 * `add_count()` is now generic (#5837). * `if_any()` and `if_all()` abort when a predicate is mistakingly used as `.cols=` (#5732). * Multiple calls to `if_any()` and/or `if_all()` in the same expression are now properly disambiguated (#5782). * `filter()` now inlines `if_any()` and `if_all()` expressions. This greatly improves performance with grouped data frames. * Fixed behaviour of `...` in top-level `across()` calls (#5813, #5832). * `across()` now inlines lambda-formulas. This is slightly more performant and will allow more optimisations in the future. * Fixed issue in `bind_rows()` causing lists to be incorrectly transformed as data frames (#5417, #5749). * `select()` no longer creates duplicate variables when renaming a variable to the same name as a grouping variable (#5841). * `dplyr_col_select()` keeps attributes for bare data frames (#5294, #5831). * Fixed quosure handling in `dplyr::group_by()` that caused issues with extra arguments (tidyverse/lubridate#959). * Removed the `name` argument from the `compute()` generic (@ianmcook, #5783). * row-wise data frames of 0 rows and list columns are supported again (#5804). # dplyr 1.0.5 * Fixed edge case of `slice_sample()` when `weight_by=` is used and there 0 rows (#5729). * `across()` can again use columns in functions defined inline (#5734). * Using testthat 3rd edition. * Fixed bugs introduced in `across()` in previous version (#5765). * `group_by()` keeps attributes unrelated to the grouping (#5760). * The `.cols=` argument of `if_any()` and `if_all()` defaults to `everything()`. # dplyr 1.0.4 * Improved performance for `across()`. This makes `summarise(across())` and `mutate(across())` perform as well as the superseded colwise equivalents (#5697). * New functions `if_any()` and `if_all()` (#4770, #5713). * `summarise()` silently ignores NULL results (#5708). * Fixed a performance regression in `mutate()` when warnings occur once per group (#5675). We no longer instrument warnings with debugging information when `mutate()` is called within `suppressWarnings()`. # dplyr 1.0.3 * `summarise()` no longer informs when the result is ungrouped (#5633). * `group_by(.drop = FALSE)` preserves ordered factors (@brianrice2, #5545). * `count()` and `tally()` are now generic. * Removed default fallbacks to lazyeval methods; this will yield better error messages when you call a dplyr function with the wrong input, and is part of our long term plan to remove the deprecated lazyeval interface. * `inner_join()` gains a `keep` parameter for consistency with the other mutating joins (@patrickbarks, #5581). * Improved performance with many columns, with a dynamic data mask using active bindings and lazy chops (#5017). * `mutate()` and friends preserves row names in data frames once more (#5418). * `group_by()` uses the ungrouped data for the implicit mutate step (#5598). You might have to define an `ungroup()` method for custom classes. For example, see https://github.com/hadley/cubelyr/pull/3. * `relocate()` can rename columns it relocates (#5569). * `distinct()` and `group_by()` have better error messages when the mutate step fails (#5060). * Clarify that `between()` is not vectorised (#5493). * Fixed `across()` issue where data frame columns would could not be referred to with `all_of()` in the nested case (`mutate()` within `mutate()`) (#5498). * `across()` handles data frames with 0 columns (#5523). * `mutate()` always keeps grouping variables, unconditional to `.keep=` (#5582). * dplyr now depends on R 3.3.0 # dplyr 1.0.2 * Fixed `across()` issue where data frame columns would mask objects referred to from `all_of()` (#5460). * `bind_cols()` gains a `.name_repair` argument, passed to `vctrs::vec_cbind()` (#5451) * `summarise(.groups = "rowwise")` makes a rowwise data frame even if the input data is not grouped (#5422). # dplyr 1.0.1 * New function `cur_data_all()` similar to `cur_data()` but includes the grouping variables (#5342). * `count()` and `tally()` no longer automatically weights by column `n` if present (#5298). dplyr 1.0.0 introduced this behaviour because of Hadley's faulty memory. Historically `tally()` automatically weighted and `count()` did not, but this behaviour was accidentally changed in 0.8.2 (#4408) so that neither automatically weighted by `n`. Since 0.8.2 is almost a year old, and the automatically weighting behaviour was a little confusing anyway, we've removed it from both `count()` and `tally()`. Use of `wt = n()` is now deprecated; now just omit the `wt` argument. * `coalesce()` now supports data frames correctly (#5326). * `cummean()` no longer has off-by-one indexing problem (@cropgen, #5287). * The call stack is preserved on error. This makes it possible to `recover()` into problematic code called from dplyr verbs (#5308). # dplyr 1.0.0 ## Breaking changes * `bind_cols()` no longer converts to a tibble, returns a data frame if the input is a data frame. * `bind_rows()`, `*_join()`, `summarise()` and `mutate()` use vctrs coercion rules. There are two main user facing changes: * Combining factor and character vectors silently creates a character vector; previously it created a character vector with a warning. * Combining multiple factors creates a factor with combined levels; previously it created a character vector with a warning. * `bind_rows()` and other functions use vctrs name repair, see `?vctrs::vec_as_names`. * `all.equal.tbl_df()` removed. * Data frames, tibbles and grouped data frames are no longer considered equal, even if the data is the same. * Equality checks for data frames no longer ignore row order or groupings. * `expect_equal()` uses `all.equal()` internally. When comparing data frames, tests that used to pass may now fail. * `distinct()` keeps the original column order. * `distinct()` on missing columns now raises an error, it has been a compatibility warning for a long time. * `group_modify()` puts the grouping variable to the front. * `n()` and `row_number()` can no longer be called directly when dplyr is not loaded, and this now generates an error: `dplyr::mutate(mtcars, x = n())`. Fix by prefixing with `dplyr::` as in `dplyr::mutate(mtcars, x = dplyr::n())` * The old data format for `grouped_df` is no longer supported. This may affect you if you have serialized grouped data frames to disk, e.g. with `saveRDS()` or when using knitr caching. * `lead()` and `lag()` are stricter about their inputs. * Extending data frames requires that the extra class or classes are added first, not last. Having the extra class at the end causes some vctrs operations to fail with a message like: ``` Input must be a vector, not a `` object ``` * `right_join()` no longer sorts the rows of the resulting tibble according to the order of the RHS `by` argument in tibble `y`. ## New features * The `cur_` functions (`cur_data()`, `cur_group()`, `cur_group_id()`, `cur_group_rows()`) provide a full set of options to you access information about the "current" group in dplyr verbs. They are inspired by data.table's `.SD`, `.GRP`, `.BY`, and `.I`. * The `rows_` functions (`rows_insert()`, `rows_update()`, `rows_upsert()`, `rows_patch()`, `rows_delete()`) provide a new API to insert and delete rows from a second data frame or table. Support for updating mutable backends is planned (#4654). * `mutate()` and `summarise()` create multiple columns from a single expression if you return a data frame (#2326). * `select()` and `rename()` use the latest version of the tidyselect interface. Practically, this means that you can now combine selections using Boolean logic (i.e. `!`, `&` and `|`), and use predicate functions with `where()` (e.g. `where(is.character)`) to select variables by type (#4680). It also makes it possible to use `select()` and `rename()` to repair data frames with duplicated names (#4615) and prevents you from accidentally introducing duplicate names (#4643). This also means that dplyr now re-exports `any_of()` and `all_of()` (#5036). * `slice()` gains a new set of helpers: * `slice_head()` and `slice_tail()` select the first and last rows, like `head()` and `tail()`, but return `n` rows _per group_. * `slice_sample()` randomly selects rows, taking over from `sample_frac()` and `sample_n()`. * `slice_min()` and `slice_max()` select the rows with the minimum or maximum values of a variable, taking over from the confusing `top_n()`. * `summarise()` can create summaries of greater than length 1 if you use a summary function that returns multiple values. * `summarise()` gains a `.groups=` argument to control the grouping structure. * New `relocate()` verb makes it easy to move columns around within a data frame (#4598). * New `rename_with()` is designed specifically for the purpose of renaming selected columns with a function (#4771). * `ungroup()` can now selectively remove grouping variables (#3760). * `pull()` can now return named vectors by specifying an additional column name (@ilarischeinin, #4102). ## Experimental features * `mutate()` (for data frames only), gains experimental new arguments `.before` and `.after` that allow you to control where the new columns are placed (#2047). * `mutate()` (for data frames only), gains an experimental new argument called `.keep` that allows you to control which variables are kept from the input `.data`. `.keep = "all"` is the default; it keeps all variables. `.keep = "none"` retains no input variables (except for grouping keys), so behaves like `transmute()`. `.keep = "unused"` keeps only variables not used to make new columns. `.keep = "used"` keeps only the input variables used to create new columns; it's useful for double checking your work (#3721). * New, experimental, `with_groups()` makes it easy to temporarily group or ungroup (#4711). ## across() * New function `across()` that can be used inside `summarise()`, `mutate()`, and other verbs to apply a function (or a set of functions) to a selection of columns. See `vignette("colwise")` for more details. * New function `c_across()` that can be used inside `summarise()` and `mutate()` in row-wise data frames to easily (e.g.) compute a row-wise mean of all numeric variables. See `vignette("rowwise")` for more details. ## rowwise() * `rowwise()` is no longer questioning; we now understand that it's an important tool when you don't have vectorised code. It now also allows you to specify additional variables that should be preserved in the output when summarising (#4723). The rowwise-ness is preserved by all operations; you need to explicit drop it with `as_tibble()` or `group_by()`. * New, experimental, `nest_by()`. It has the same interface as `group_by()`, but returns a rowwise data frame of grouping keys, supplemental with a list-column of data frames containing the rest of the data. ## vctrs * The implementation of all dplyr verbs have been changed to use primitives provided by the vctrs package. This makes it easier to add support for new types of vector, radically simplifies the implementation, and makes all dplyr verbs more consistent. * The place where you are mostly likely to be impacted by the coercion changes is when working with factors in joins or grouped mutates: now when combining factors with different levels, dplyr creates a new factor with the union of the levels. This matches base R more closely, and while perhaps strictly less correct, is much more convenient. * dplyr dropped its two heaviest dependencies: Rcpp and BH. This should make it considerably easier and faster to build from source. * The implementation of all verbs has been carefully thought through. This mostly makes implementation simpler but should hopefully increase consistency, and also makes it easier to adapt to dplyr to new data structures in the new future. Pragmatically, the biggest difference for most people will be that each verb documents its return value in terms of rows, columns, groups, and data frame attributes. * Row names are now preserved when working with data frames. ## Grouping * `group_by()` uses hashing from the `vctrs` package. * Grouped data frames now have `names<-`, `[[<-`, `[<-` and `$<-` methods that re-generate the underlying grouping. Note that modifying grouping variables in multiple steps (i.e. `df$grp1 <- 1; df$grp2 <- 1`) will be inefficient since the data frame will be regrouped after each modification. * `[.grouped_df` now regroups to respect any grouping columns that have been removed (#4708). * `mutate()` and `summarise()` can now modify grouping variables (#4709). * `group_modify()` works with additional arguments (@billdenney and @cderv, #4509) * `group_by()` does not create an arbitrary NA group when grouping by factors with `drop = TRUE` (#4460). ## Lifecycle changes * All deprecations now use the [lifecycle](https://lifecycle.r-lib.org), that means by default you'll only see a deprecation warning once per session, and you can control with `options(lifecycle_verbosity = x)` where `x` is one of NULL, "quiet", "warning", and "error". ### Removed * `id()`, deprecated in dplyr 0.5.0, is now defunct. * `failwith()`, deprecated in dplyr 0.7.0, is now defunct. * `tbl_cube()` and `nasa` have been pulled out into a separate cubelyr package (#4429). * `rbind_all()` and `rbind_list()` have been removed (@bjungbogati, #4430). * `dr_dplyr()` has been removed as it is no longer needed (#4433, @smwindecker). ### Deprecated * Use of pkgconfig for setting `na_matches` argument to join functions is now deprecated (#4914). This was rarely used, and I'm now confident that the default is correct for R. * In `add_count()`, the `drop` argument has been deprecated because it didn't actually affect the output. * `add_rownames()`: please use `tibble::rownames_to_column()` instead. * `as.tbl()` and `tbl_df()`: please use `as_tibble()` instead. * `bench_tbls()`, `compare_tbls()`, `compare_tbls2()`, `eval_tbls()` and `eval_tbls2()` are now deprecated. That were only used in a handful of packages, and we now believe that you're better off performing comparisons more directly (#4675). * `combine()`: please use `vctrs::vec_c()` instead. * `funs()`: please use `list()` instead. * `group_by(add = )`: please use `.add` instead. * `group_by(.dots = )`/`group_by_prepare(.dots = )`: please use `!!!` instead (#4734). * The use of zero-arg `group_indices()` to retrieve the group id for the "current" group is deprecated; instead use `cur_group_id()`. * Passing arguments to `group_keys()` or `group_indices()` to change the grouping has been deprecated, instead do grouping first yourself. * `location()` and `changes()`: please use `lobstr::ref()` instead. * `progress_estimated()` is soft deprecated; it's not the responsibility of dplyr to provide progress bars (#4935). * `src_local()` has been deprecated; it was part of an approach to testing dplyr backends that didn't pan out. * `src_mysql()`, `src_postgres()`, and `src_sqlite()` has been deprecated. We've recommended against them for some time. Instead please use the approach described at . * `select_vars()`, `rename_vars()`, `select_var()`, `current_vars()` are now deprecated (@perezp44, #4432) ### Superseded * The scoped helpers (all functions ending in `_if`, `_at`, or `_all`) have been superseded by `across()`. This dramatically reduces the API surface for dplyr, while at the same providing providing a more flexible and less error-prone interface (#4769). `rename_*()` and `select_*()` have been superseded by `rename_with()`. * `do()` is superseded in favour of `summarise()`. * `sample_n()` and `sample_frac()` have been superseded by `slice_sample()`. See `?sample_n` for details about why, and for examples converting from old to new usage. * `top_n()` has been superseded by`slice_min()`/`slice_max()`. See `?top_n` for details about why, and how to convert old to new usage (#4494). ### Questioning * `all_equal()` is questioning; it solves a problem that no longer seems important. ### Stable * `rowwise()` is no longer questioning. ## Documentation improvements * New `vignette("base")` which describes how dplyr verbs relate to the base R equivalents (@sastoudt, #4755) * New `vignette("grouping")` gives more details about how dplyr verbs change when applied to grouped data frames (#4779, @MikeKSmith). * `vignette("programming")` has been completely rewritten to reflect our latest vocabulary, the most recent rlang features, and our current recommendations. It should now be substantially easier to program with dplyr. ## Minor improvements and bug fixes * dplyr now has a rudimentary, experimental, and stop-gap, extension mechanism documented in `?dplyr_extending` * dplyr no longer provides a `all.equal.tbl_df()` method. It never should have done so in the first place because it owns neither the generic nor the class. It also provided a problematic implementation because, by default, it ignored the order of the rows and the columns which is usually important. This is likely to cause new test failures in downstream packages; but on the whole we believe those failures to either reflect unexpected behaviour or tests that need to be strengthened (#2751). * `coalesce()` now uses vctrs recycling and common type coercion rules (#5186). * `count()` and `add_count()` do a better job of preserving input class and attributes (#4086). * `distinct()` errors if you request it use variables that don't exist (this was previously a warning) (#4656). * `filter()`, `mutate()` and `summarise()` get better error messages. * `filter()` handles data frame results when all columns are logical vectors by reducing them with `&` (#4678). In particular this means `across()` can be used in `filter()`. * `left_join()`, `right_join()`, and `full_join()` gain a `keep` argument so that you can optionally choose to keep both sets of join keys (#4589). This is useful when you want to figure out which rows were missing from either side. * Join functions can now perform a cross-join by specifying `by = character()` (#4206.) * `groups()` now returns `list()` for ungrouped data; previously it returned `NULL` which was type-unstable (when there are groups it returns a list of symbols). * The first argument of `group_map()`, `group_modify()` and `group_walk()` has been changed to `.data` for consistency with other generics. * `group_keys.rowwise_df()` gives a 0 column data frame with `n()` rows. * `group_map()` is now a generic (#4576). * `group_by(..., .add = TRUE)` replaces `group_by(..., add = TRUE)`, with a deprecation message. The old argument name was a mistake because it prevents you from creating a new grouping var called `add` and it violates our naming conventions (#4137). * `intersect()`, `union()`, `setdiff()` and `setequal()` generics are now imported from the generics package. This reduces a conflict with lubridate. * `order_by()` gives an informative hint if you accidentally call it instead of `arrange()` #3357. * `tally()` and `count()` now message if the default output `name` (n), already exists in the data frame. To quiet the message, you'll need to supply an explicit `name` (#4284). You can override the default weighting to using a constant by setting `wt = 1`. * `starwars` dataset now does a better job of separating biological sex from gender identity. The previous `gender` column has been renamed to `sex`, since it actually describes the individual's biological sex. A new `gender` column encodes the actual gender identity using other information about the Star Wars universe (@MeganBeckett, #4456). * `src_tbls()` accepts `...` arguments (#4485, @ianmcook). This could be a breaking change for some dplyr backend packages that implement `src_tbls()`. * Better performance for extracting slices of factors and ordered factors (#4501). * `rename_at()` and `rename_all()` call the function with a simple character vector, not a `dplyr_sel_vars` (#4459). * `ntile()` is now more consistent with database implementations if the buckets have irregular size (#4495). # dplyr 0.8.5 (2020-03-07) * Maintenance release for compatibility with R-devel. # dplyr 0.8.4 (2020-01-30) * Adapt tests to changes in dependent packages. # dplyr 0.8.3 (2019-07-04) * Fixed performance regression introduced in version 0.8.2 (#4458). # dplyr 0.8.2 (2019-06-28) ## New functions * `top_frac(data, proportion)` is a shorthand for `top_n(data, proportion * n())` (#4017). ## colwise changes * Using quosures in colwise verbs is deprecated (#4330). * Updated `distinct_if()`, `distinct_at()` and `distinct_all()` to include `.keep_all` argument (@beansrowning, #4343). * `rename_at()` handles empty selection (#4324). * `*_if()` functions correctly handle columns with special names (#4380). * colwise functions support constants in formulas (#4374). ## Hybrid evaluation changes * hybrid rank functions correctly handle NA (#4427). * `first()`, `last()` and `nth()` hybrid version handles factors (#4295). ## Minor changes * `top_n()` quotes its `n` argument, `n` no longer needs to be constant for all groups (#4017). * `tbl_vars()` keeps information on grouping columns by returning a `dplyr_sel_vars` object (#4106). * `group_split()` always sets the `ptype` attribute, which make it more robust in the case where there are 0 groups. * `group_map()` and `group_modify()` work in the 0 group edge case (#4421) * `select.list()` method added so that `select()` does not dispatch on lists (#4279). * `view()` is reexported from tibble (#4423). * `group_by()` puts NA groups last in character vectors (#4227). * `arrange()` handles integer64 objects (#4366). * `summarise()` correctly resolves summarised list columns (#4349). # dplyr 0.8.1 (2019-05-14) ## Breaking changes * `group_modify()` is the new name of the function previously known as `group_map()` ## New functions * `group_map()` now only calls the function on each group and return a list. * `group_by_drop_default()`, previously known as `dplyr:::group_drops()` is exported (#4245). ## Minor changes * Lists of formulas passed to colwise verbs are now automatically named. * `group_by()` does a shallow copy even in the no groups case (#4221). * Fixed `mutate()` on rowwise data frames with 0 rows (#4224). * Fixed handling of bare formulas in colwise verbs (#4183). * Fixed performance of `n_distinct()` (#4202). * `group_indices()` now ignores empty groups by default for `data.frame`, which is consistent with the default of `group_by()` (@yutannihilation, #4208). * Fixed integer overflow in hybrid `ntile()` (#4186). * colwise functions `summarise_at()` ... can rename vars in the case of multiple functions (#4180). * `select_if()` and `rename_if()` handle logical vector predicate (#4213). * hybrid `min()` and `max()` cast to integer when possible (#4258). * `bind_rows()` correctly handles the cases where there are multiple consecutive `NULL` (#4296). * Support for R 3.1.* has been dropped. The minimal R version supported is now 3.2.0. https://www.tidyverse.org/articles/2019/04/r-version-support/ * `rename_at()` handles empty selection (#4324). # dplyr 0.8.0.1 (2019-02-15) * Fixed integer C/C++ division, forced released by CRAN (#4185). # dplyr 0.8.0 (2019-02-14) ## Breaking changes * The error `could not find function "n"` or the warning ```Calling `n()` without importing or prefixing it is deprecated, use `dplyr::n()` ``` indicates when functions like `n()`, `row_number()`, ... are not imported or prefixed. The easiest fix is to import dplyr with `import(dplyr)` in your `NAMESPACE` or `#' @import dplyr` in a roxygen comment, alternatively such functions can be imported selectively as any other function with `importFrom(dplyr, n)` in the `NAMESPACE` or `#' @importFrom dplyr n` in a roxygen comment. The third option is to prefix them, i.e. use `dplyr::n()` * If you see `checking S3 generic/method consistency` in R CMD check for your package, note that : - `sample_n()` and `sample_frac()` have gained `...` - `filter()` and `slice()` have gained `.preserve` - `group_by()` has gained `.drop` * ```Error: `.data` is a corrupt grouped_df, ...``` signals code that makes wrong assumptions about the internals of a grouped data frame. ## New functions * New selection helpers `group_cols()`. It can be called in selection contexts such as `select()` and matches the grouping variables of grouped tibbles. * `last_col()` is re-exported from tidyselect (#3584). * `group_trim()` drops unused levels of factors that are used as grouping variables. * `nest_join()` creates a list column of the matching rows. `nest_join()` + `tidyr::unnest()` is equivalent to `inner_join` (#3570). ```r band_members %>% nest_join(band_instruments) ``` * `group_nest()` is similar to `tidyr::nest()` but focusing on the variables to nest by instead of the nested columns. ```r starwars %>% group_by(species, homeworld) %>% group_nest() starwars %>% group_nest(species, homeworld) ``` * `group_split()` is similar to `base::split()` but operating on existing groups when applied to a grouped data frame, or subject to the data mask on ungrouped data frames ```r starwars %>% group_by(species, homeworld) %>% group_split() starwars %>% group_split(species, homeworld) ``` * `group_map()` and `group_walk()` are purrr-like functions to iterate on groups of a grouped data frame, jointly identified by the data subset (exposed as `.x`) and the data key (a one row tibble, exposed as `.y`). `group_map()` returns a grouped data frame that combines the results of the function, `group_walk()` is only used for side effects and returns its input invisibly. ```r mtcars %>% group_by(cyl) %>% group_map(~ head(.x, 2L)) ``` * `distinct_prepare()`, previously known as `distinct_vars()` is exported. This is mostly useful for alternative backends (e.g. `dbplyr`). ## Major changes * `group_by()` gains the `.drop` argument. When set to `FALSE` the groups are generated based on factor levels, hence some groups may be empty (#341). ```r # 3 groups tibble( x = 1:2, f = factor(c("a", "b"), levels = c("a", "b", "c")) ) %>% group_by(f, .drop = FALSE) # the order of the grouping variables matter df <- tibble( x = c(1,2,1,2), f = factor(c("a", "b", "a", "b"), levels = c("a", "b", "c")) ) df %>% group_by(f, x, .drop = FALSE) df %>% group_by(x, f, .drop = FALSE) ``` The default behaviour drops the empty groups as in the previous versions. ```r tibble( x = 1:2, f = factor(c("a", "b"), levels = c("a", "b", "c")) ) %>% group_by(f) ``` * `filter()` and `slice()` gain a `.preserve` argument to control which groups it should keep. The default `filter(.preserve = FALSE)` recalculates the grouping structure based on the resulting data, otherwise it is kept as is. ```r df <- tibble( x = c(1,2,1,2), f = factor(c("a", "b", "a", "b"), levels = c("a", "b", "c")) ) %>% group_by(x, f, .drop = FALSE) df %>% filter(x == 1) df %>% filter(x == 1, .preserve = TRUE) ``` * The notion of lazily grouped data frames have disappeared. All dplyr verbs now recalculate immediately the grouping structure, and respect the levels of factors. * Subsets of columns now properly dispatch to the `[` or `[[` method when the column is an object (a vector with a class) instead of making assumptions on how the column should be handled. The `[` method must handle integer indices, including `NA_integer_`, i.e. `x[NA_integer_]` should produce a vector of the same class as `x` with whatever represents a missing value. ## Minor changes * `tally()` works correctly on non-data frame table sources such as `tbl_sql` (#3075). * `sample_n()` and `sample_frac()` can use `n()` (#3527) * `distinct()` respects the order of the variables provided (#3195, @foo-bar-baz-qux) and handles the 0 rows and 0 columns special case (#2954). * `combine()` uses tidy dots (#3407). * `group_indices()` can be used without argument in expressions in verbs (#1185). * Using `mutate_all()`, `transmute_all()`, `mutate_if()` and `transmute_if()` with grouped tibbles now informs you that the grouping variables are ignored. In the case of the `_all()` verbs, the message invites you to use `mutate_at(df, vars(-group_cols()))` (or the equivalent `transmute_at()` call) instead if you'd like to make it explicit in your code that the operation is not applied on the grouping variables. * Scoped variants of `arrange()` respect the `.by_group` argument (#3504). * `first()` and `last()` hybrid functions fall back to R evaluation when given no arguments (#3589). * `mutate()` removes a column when the expression evaluates to `NULL` for all groups (#2945). * grouped data frames support `[, drop = TRUE]` (#3714). * New low-level constructor `new_grouped_df()` and validator `validate_grouped_df` (#3837). * `glimpse()` prints group information on grouped tibbles (#3384). * `sample_n()` and `sample_frac()` gain `...` (#2888). * Scoped filter variants now support functions and purrr-like lambdas: ```r mtcars %>% filter_at(vars(hp, vs), ~ . %% 2 == 0) ``` ## Lifecycle * `do()`, `rowwise()` and `combine()` are questioning (#3494). * `funs()` is soft-deprecated and will start issuing warnings in a future version. ## Changes to column wise functions * Scoped variants for `distinct()`: `distinct_at()`, `distinct_if()`, `distinct_all()` (#2948). * `summarise_at()` excludes the grouping variables (#3613). * `mutate_all()`, `mutate_at()`, `summarise_all()` and `summarise_at()` handle utf-8 names (#2967). ## Performance * R expressions that cannot be handled with native code are now evaluated with unwind-protection when available (on R 3.5 and later). This improves the performance of dplyr on data frames with many groups (and hence many expressions to evaluate). We benchmarked that computing a grouped average is consistently twice as fast with unwind-protection enabled. Unwind-protection also makes dplyr more robust in corner cases because it ensures the C++ destructors are correctly called in all circumstances (debugger exit, captured condition, restart invocation). * `sample_n()` and `sample_frac()` gain `...` (#2888). * Improved performance for wide tibbles (#3335). * Faster hybrid `sum()`, `mean()`, `var()` and `sd()` for logical vectors (#3189). * Hybrid version of `sum(na.rm = FALSE)` exits early when there are missing values. This considerably improves performance when there are missing values early in the vector (#3288). * `group_by()` does not trigger the additional `mutate()` on simple uses of the `.data` pronoun (#3533). ## Internal * The grouping metadata of grouped data frame has been reorganized in a single tidy tibble, that can be accessed with the new `group_data()` function. The grouping tibble consists of one column per grouping variable, followed by a list column of the (1-based) indices of the groups. The new `group_rows()` function retrieves that list of indices (#3489). ```r # the grouping metadata, as a tibble group_by(starwars, homeworld) %>% group_data() # the indices group_by(starwars, homeworld) %>% group_data() %>% pull(.rows) group_by(starwars, homeworld) %>% group_rows() ``` * Hybrid evaluation has been completely redesigned for better performance and stability. ## Documentation * Add documentation example for moving variable to back in `?select` (#3051). * column wise functions are better documented, in particular explaining when grouping variables are included as part of the selection. ### Deprecated and defunct functions * `mutate_each()` and `summarise_each()` are deprecated. # dplyr 0.7.6 * `exprs()` is no longer exported to avoid conflicts with `Biobase::exprs()` (#3638). * The MASS package is explicitly suggested to fix CRAN warnings on R-devel (#3657). * Set operations like `intersect()` and `setdiff()` reconstruct groups metadata (#3587) and keep the order of the rows (#3839). * Using namespaced calls to `base::sort()` and `base::unique()` from C++ code to avoid ambiguities when these functions are overridden (#3644). * Fix rchk errors (#3693). # dplyr 0.7.5 (2018-04-14) ## Breaking changes for package developers * The major change in this version is that dplyr now depends on the selecting backend of the tidyselect package. If you have been linking to `dplyr::select_helpers` documentation topic, you should update the link to point to `tidyselect::select_helpers`. * Another change that causes warnings in packages is that dplyr now exports the `exprs()` function. This causes a collision with `Biobase::exprs()`. Either import functions from dplyr selectively rather than in bulk, or do not import `Biobase::exprs()` and refer to it with a namespace qualifier. ## Bug fixes * `distinct(data, "string")` now returns a one-row data frame again. (The previous behavior was to return the data unchanged.) * `do()` operations with more than one named argument can access `.` (#2998). * Reindexing grouped data frames (e.g. after `filter()` or `..._join()`) never updates the `"class"` attribute. This also avoids unintended updates to the original object (#3438). * Fixed rare column name clash in `..._join()` with non-join columns of the same name in both tables (#3266). * Fix `ntile()` and `row_number()` ordering to use the locale-dependent ordering functions in R when dealing with character vectors, rather than always using the C-locale ordering function in C (#2792, @foo-bar-baz-qux). * Summaries of summaries (such as `summarise(b = sum(a), c = sum(b))`) are now computed using standard evaluation for simplicity and correctness, but slightly slower (#3233). * Fixed `summarise()` for empty data frames with zero columns (#3071). ## Major changes * `enexpr()`, `expr()`, `exprs()`, `sym()` and `syms()` are now exported. `sym()` and `syms()` construct symbols from strings or character vectors. The `expr()` variants are equivalent to `quo()`, `quos()` and `enquo()` but return simple expressions rather than quosures. They support quasiquotation. * dplyr now depends on the new tidyselect package to power `select()`, `rename()`, `pull()` and their variants (#2896). Consequently `select_vars()`, `select_var()` and `rename_vars()` are soft-deprecated and will start issuing warnings in a future version. Following the switch to tidyselect, `select()` and `rename()` fully support character vectors. You can now unquote variables like this: ``` vars <- c("disp", "cyl") select(mtcars, !! vars) select(mtcars, -(!! vars)) ``` Note that this only works in selecting functions because in other contexts strings and character vectors are ambiguous. For instance strings are a valid input in mutating operations and `mutate(df, "foo")` creates a new column by recycling "foo" to the number of rows. ## Minor changes * Support for raw vector columns in `arrange()`, `group_by()`, `mutate()`, `summarise()` and `..._join()` (minimal `raw` x `raw` support initially) (#1803). * `bind_cols()` handles unnamed list (#3402). * `bind_rows()` works around corrupt columns that have the object bit set while having no class attribute (#3349). * `combine()` returns `logical()` when all inputs are `NULL` (or when there are no inputs) (#3365, @zeehio). * `distinct()` now supports renaming columns (#3234). * Hybrid evaluation simplifies `dplyr::foo()` to `foo()` (#3309). Hybrid functions can now be masked by regular R functions to turn off hybrid evaluation (#3255). The hybrid evaluator finds functions from dplyr even if dplyr is not attached (#3456). * In `mutate()` it is now illegal to use `data.frame` in the rhs (#3298). * Support `!!!` in `recode_factor()` (#3390). * `row_number()` works on empty subsets (#3454). * `select()` and `vars()` now treat `NULL` as empty inputs (#3023). * Scoped select and rename functions (`select_all()`, `rename_if()` etc.) now work with grouped data frames, adapting the grouping as necessary (#2947, #3410). `group_by_at()` can group by an existing grouping variable (#3351). `arrange_at()` can use grouping variables (#3332). * `slice()` no longer enforce tibble classes when input is a simple `data.frame`, and ignores 0 (#3297, #3313). * `transmute()` no longer prints a message when including a group variable. ## Documentation * Improved documentation for `funs()` (#3094) and set operations (e.g. `union()`) (#3238, @edublancas). ## Error messages * Better error message if dbplyr is not installed when accessing database backends (#3225). * `arrange()` fails gracefully on `data.frame` columns (#3153). * Corrected error message when calling `cbind()` with an object of wrong length (#3085). * Add warning with explanation to `distinct()` if any of the selected columns are of type `list` (#3088, @foo-bar-baz-qux), or when used on unknown columns (#2867, @foo-bar-baz-qux). * Show clear error message for bad arguments to `funs()` (#3368). * Better error message in `..._join()` when joining data frames with duplicate or `NA` column names. Joining such data frames with a semi- or anti-join now gives a warning, which may be converted to an error in future versions (#3243, #3417). * Dedicated error message when trying to use columns of the `Interval` or `Period` classes (#2568). * Added an `.onDetach()` hook that allows for plyr to be loaded and attached without the warning message that says functions in dplyr will be masked, since dplyr is no longer attached (#3359, @jwnorman). ## Performance * `sample_n()` and `sample_frac()` on grouped data frame are now faster especially for those with large number of groups (#3193, @saurfang). ## Internal * Compute variable names for joins in R (#3430). * Bumped Rcpp dependency to 0.12.15 to avoid imperfect detection of `NA` values in hybrid evaluation fixed in RcppCore/Rcpp#790 (#2919). * Avoid cleaning the data mask, a temporary environment used to evaluate expressions. If the environment, in which e.g. a `mutate()` expression is evaluated, is preserved until after the operation, accessing variables from that environment now gives a warning but still returns `NULL` (#3318). # dplyr 0.7.4 * Fix recent Fedora and ASAN check errors (#3098). * Avoid dependency on Rcpp 0.12.10 (#3106). # dplyr 0.7.3 * Fixed protection error that occurred when creating a character column using grouped `mutate()` (#2971). * Fixed a rare problem with accessing variable values in `summarise()` when all groups have size one (#3050). * `distinct()` now throws an error when used on unknown columns (#2867, @foo-bar-baz-qux). * Fixed rare out-of-bounds memory write in `slice()` when negative indices beyond the number of rows were involved (#3073). * `select()`, `rename()` and `summarise()` no longer change the grouped vars of the original data (#3038). * `nth(default = var)`, `first(default = var)` and `last(default = var)` fall back to standard evaluation in a grouped operation instead of triggering an error (#3045). * `case_when()` now works if all LHS are atomic (#2909), or when LHS or RHS values are zero-length vectors (#3048). * `case_when()` accepts `NA` on the LHS (#2927). * Semi- and anti-joins now preserve the order of left-hand-side data frame (#3089). * Improved error message for invalid list arguments to `bind_rows()` (#3068). * Grouping by character vectors is now faster (#2204). * Fixed a crash that occurred when an unexpected input was supplied to the `call` argument of `order_by()` (#3065). # dplyr 0.7.2 * Move build-time vs. run-time checks out of `.onLoad()` and into `dr_dplyr()`. # dplyr 0.7.1 * Use new versions of bindrcpp and glue to avoid protection problems. Avoid wrapping arguments to internal error functions (#2877). Fix two protection mistakes found by rchk (#2868). * Fix C++ error that caused compilation to fail on mac cran (#2862) * Fix undefined behaviour in `between()`, where `NA_REAL` were assigned instead of `NA_LOGICAL`. (#2855, @zeehio) * `top_n()` now executes operations lazily for compatibility with database backends (#2848). * Reuse of new variables created in ungrouped `mutate()` possible again, regression introduced in dplyr 0.7.0 (#2869). * Quosured symbols do not prevent hybrid handling anymore. This should fix many performance issues introduced with tidyeval (#2822). # dplyr 0.7.0 ## New data, functions, and features * Five new datasets provide some interesting built-in datasets to demonstrate dplyr verbs (#2094): * `starwars` dataset about starwars characters; has list columns * `storms` has the trajectories of ~200 tropical storms * `band_members`, `band_instruments` and `band_instruments2` has some simple data to demonstrate joins. * New `add_count()` and `add_tally()` for adding an `n` column within groups (#2078, @dgrtwo). * `arrange()` for grouped data frames gains a `.by_group` argument so you can choose to sort by groups if you want to (defaults to `FALSE`) (#2318) * New `pull()` generic for extracting a single column either by name or position (either from the left or the right). Thanks to @paulponcet for the idea (#2054). This verb is powered with the new `select_var()` internal helper, which is exported as well. It is like `select_vars()` but returns a single variable. * `as_tibble()` is re-exported from tibble. This is the recommend way to create tibbles from existing data frames. `tbl_df()` has been softly deprecated. `tribble()` is now imported from tibble (#2336, @chrMongeau); this is now preferred to `frame_data()`. ## Deprecated and defunct * dplyr no longer messages that you need dtplyr to work with data.table (#2489). * Long deprecated `regroup()`, `mutate_each_q()` and `summarise_each_q()` functions have been removed. * Deprecated `failwith()`. I'm not even sure why it was here. * Soft-deprecated `mutate_each()` and `summarise_each()`, these functions print a message which will be changed to a warning in the next release. * The `.env` argument to `sample_n()` and `sample_frac()` is defunct, passing a value to this argument print a message which will be changed to a warning in the next release. ## Databases This version of dplyr includes some major changes to how database connections work. By and large, you should be able to continue using your existing dplyr database code without modification, but there are two big changes that you should be aware of: * Almost all database related code has been moved out of dplyr and into a new package, [dbplyr](https://github.com/tidyverse/dbplyr/). This makes dplyr simpler, and will make it easier to release fixes for bugs that only affect databases. `src_mysql()`, `src_postgres()`, and `src_sqlite()` will still live dplyr so your existing code continues to work. * It is no longer necessary to create a remote "src". Instead you can work directly with the database connection returned by DBI. This reflects the maturity of the DBI ecosystem. Thanks largely to the work of Kirill Muller (funded by the R Consortium) DBI backends are now much more consistent, comprehensive, and easier to use. That means that there's no longer a need for a layer in between you and DBI. You can continue to use `src_mysql()`, `src_postgres()`, and `src_sqlite()`, but I recommend a new style that makes the connection to DBI more clear: ```R library(dplyr) con <- DBI::dbConnect(RSQLite::SQLite(), ":memory:") DBI::dbWriteTable(con, "mtcars", mtcars) mtcars2 <- tbl(con, "mtcars") mtcars2 ``` This is particularly useful if you want to perform non-SELECT queries as you can do whatever you want with `DBI::dbGetQuery()` and `DBI::dbExecute()`. If you've implemented a database backend for dplyr, please read the [backend news](https://github.com/tidyverse/dbplyr/blob/main/NEWS.md#backends) to see what's changed from your perspective (not much). If you want to ensure your package works with both the current and previous version of dplyr, see `wrap_dbplyr_obj()` for helpers. ## UTF-8 * Internally, column names are always represented as character vectors, and not as language symbols, to avoid encoding problems on Windows (#1950, #2387, #2388). * Error messages and explanations of data frame inequality are now encoded in UTF-8, also on Windows (#2441). * Joins now always reencode character columns to UTF-8 if necessary. This gives a nice speedup, because now pointer comparison can be used instead of string comparison, but relies on a proper encoding tag for all strings (#2514). * Fixed problems when joining factor or character encodings with a mix of native and UTF-8 encoded values (#1885, #2118, #2271, #2451). * Fix `group_by()` for data frames that have UTF-8 encoded names (#2284, #2382). * New `group_vars()` generic that returns the grouping as character vector, to avoid the potentially lossy conversion to language symbols. The list returned by `group_by_prepare()` now has a new `group_names` component (#1950, #2384). ## Colwise functions * `rename()`, `select()`, `group_by()`, `filter()`, `arrange()` and `transmute()` now have scoped variants (verbs suffixed with `_if()`, `_at()` and `_all()`). Like `mutate_all()`, `summarise_if()`, etc, these variants apply an operation to a selection of variables. * The scoped verbs taking predicates (`mutate_if()`, `summarise_if()`, etc) now support S3 objects and lazy tables. S3 objects should implement methods for `length()`, `[[` and `tbl_vars()`. For lazy tables, the first 100 rows are collected and the predicate is applied on this subset of the data. This is robust for the common case of checking the type of a column (#2129). * Summarise and mutate colwise functions pass `...` on to the manipulation functions. * The performance of colwise verbs like `mutate_all()` is now back to where it was in `mutate_each()`. * `funs()` has better handling of namespaced functions (#2089). * Fix issue with `mutate_if()` and `summarise_if()` when a predicate function returns a vector of `FALSE` (#1989, #2009, #2011). ## Tidyeval dplyr has a new approach to non-standard evaluation (NSE) called tidyeval. It is described in detail in `vignette("programming")` but, in brief, gives you the ability to interpolate values in contexts where dplyr usually works with expressions: ```{r} my_var <- quo(homeworld) starwars %>% group_by(!!my_var) %>% summarise_at(vars(height:mass), mean, na.rm = TRUE) ``` This means that the underscored version of each main verb is no longer needed, and so these functions have been deprecated (but remain around for backward compatibility). * `order_by()`, `top_n()`, `sample_n()` and `sample_frac()` now use tidyeval to capture their arguments by expression. This makes it possible to use unquoting idioms (see `vignette("programming")`) and fixes scoping issues (#2297). * Most verbs taking dots now ignore the last argument if empty. This makes it easier to copy lines of code without having to worry about deleting trailing commas (#1039). * [API] The new `.data` and `.env` environments can be used inside all verbs that operate on data: `.data$column_name` accesses the column `column_name`, whereas `.env$var` accesses the external variable `var`. Columns or external variables named `.data` or `.env` are shadowed, use `.data$...` and/or `.env$...` to access them. (`.data` implements strict matching also for the `$` operator (#2591).) The `column()` and `global()` functions have been removed. They were never documented officially. Use the new `.data` and `.env` environments instead. * Expressions in verbs are now interpreted correctly in many cases that failed before (e.g., use of `$`, `case_when()`, nonstandard evaluation, ...). These expressions are now evaluated in a specially constructed temporary environment that retrieves column data on demand with the help of the `bindrcpp` package (#2190). This temporary environment poses restrictions on assignments using `<-` inside verbs. To prevent leaking of broken bindings, the temporary environment is cleared after the evaluation (#2435). ## Verbs ### Joins * [API] `xxx_join.tbl_df(na_matches = "never")` treats all `NA` values as different from each other (and from any other value), so that they never match. This corresponds to the behavior of joins for database sources, and of database joins in general. To match `NA` values, pass `na_matches = "na"` to the join verbs; this is only supported for data frames. The default is `na_matches = "na"`, kept for the sake of compatibility to v0.5.0. It can be tweaked by calling `pkgconfig::set_config("dplyr::na_matches", "na")` (#2033). * `common_by()` gets a better error message for unexpected inputs (#2091) * Fix groups when joining grouped data frames with duplicate columns (#2330, #2334, @davidkretch). * One of the two join suffixes can now be an empty string, dplyr no longer hangs (#2228, #2445). * Anti- and semi-joins warn if factor levels are inconsistent (#2741). * Warnings about join column inconsistencies now contain the column names (#2728). ### Select * For selecting variables, the first selector decides if it's an inclusive selection (i.e., the initial column list is empty), or an exclusive selection (i.e., the initial column list contains all columns). This means that `select(mtcars, contains("am"), contains("FOO"), contains("vs"))` now returns again both `am` and `vs` columns like in dplyr 0.4.3 (#2275, #2289, @r2evans). * Select helpers now throw an error if called when no variables have been set (#2452) * Helper functions in `select()` (and related verbs) are now evaluated in a context where column names do not exist (#2184). * `select()` (and the internal function `select_vars()`) now support column names in addition to column positions. As a result, expressions like `select(mtcars, "cyl")` are now allowed. ### Other * `recode()`, `case_when()` and `coalesce()` now support splicing of arguments with rlang's `!!!` operator. * `count()` now preserves the grouping of its input (#2021). * `distinct()` no longer duplicates variables (#2001). * Empty `distinct()` with a grouped data frame works the same way as an empty `distinct()` on an ungrouped data frame, namely it uses all variables (#2476). * `copy_to()` now returns its output invisibly (since you're often just calling for the side-effect). * `filter()` and `lag()` throw informative error if used with ts objects (#2219) * `mutate()` recycles list columns of length 1 (#2171). * `mutate()` gives better error message when attempting to add a non-vector column (#2319), or attempting to remove a column with `NULL` (#2187, #2439). * `summarise()` now correctly evaluates newly created factors (#2217), and can create ordered factors (#2200). * Ungrouped `summarise()` uses summary variables correctly (#2404, #2453). * Grouped `summarise()` no longer converts character `NA` to empty strings (#1839). ## Combining and comparing * `all_equal()` now reports multiple problems as a character vector (#1819, #2442). * `all_equal()` checks that factor levels are equal (#2440, #2442). * `bind_rows()` and `bind_cols()` give an error for database tables (#2373). * `bind_rows()` works correctly with `NULL` arguments and an `.id` argument (#2056), and also for zero-column data frames (#2175). * Breaking change: `bind_rows()` and `combine()` are more strict when coercing. Logical values are no longer coerced to integer and numeric. Date, POSIXct and other integer or double-based classes are no longer coerced to integer or double as there is chance of attributes or information being lost (#2209, @zeehio). * `bind_cols()` now calls `tibble::repair_names()` to ensure that all names are unique (#2248). * `bind_cols()` handles empty argument list (#2048). * `bind_cols()` better handles `NULL` inputs (#2303, #2443). * `bind_rows()` explicitly rejects columns containing data frames (#2015, #2446). * `bind_rows()` and `bind_cols()` now accept vectors. They are treated as rows by the former and columns by the latter. Rows require inner names like `c(col1 = 1, col2 = 2)`, while columns require outer names: `col1 = c(1, 2)`. Lists are still treated as data frames but can be spliced explicitly with `!!!`, e.g. `bind_rows(!!! x)` (#1676). * `rbind_list()` and `rbind_all()` now call `.Deprecated()`, they will be removed in the next CRAN release. Please use `bind_rows()` instead. * `combine()` accepts `NA` values (#2203, @zeehio) * `combine()` and `bind_rows()` with character and factor types now always warn about the coercion to character (#2317, @zeehio) * `combine()` and `bind_rows()` accept `difftime` objects. * `mutate` coerces results from grouped dataframes accepting combinable data types (such as `integer` and `numeric`). (#1892, @zeehio) ## Vector functions * `%in%` gets new hybrid handler (#126). * `between()` returns NA if `left` or `right` is `NA` (fixes #2562). * `case_when()` supports `NA` values (#2000, @tjmahr). * `first()`, `last()`, and `nth()` have better default values for factor, Dates, POSIXct, and data frame inputs (#2029). * Fixed segmentation faults in hybrid evaluation of `first()`, `last()`, `nth()`, `lead()`, and `lag()`. These functions now always fall back to the R implementation if called with arguments that the hybrid evaluator cannot handle (#948, #1980). * `n_distinct()` gets larger hash tables given slightly better performance (#977). * `nth()` and `ntile()` are more careful about proper data types of their return values (#2306). * `ntile()` ignores `NA` when computing group membership (#2564). * `lag()` enforces integer `n` (#2162, @kevinushey). * hybrid `min()` and `max()` now always return a `numeric` and work correctly in edge cases (empty input, all `NA`, ...) (#2305, #2436). * `min_rank("string")` no longer segfaults in hybrid evaluation (#2279, #2444). * `recode()` can now recode a factor to other types (#2268) * `recode()` gains `.dots` argument to support passing replacements as list (#2110, @jlegewie). ## Other minor changes and bug fixes * Many error messages are more helpful by referring to a column name or a position in the argument list (#2448). * New `is_grouped_df()` alias to `is.grouped_df()`. * `tbl_vars()` now has a `group_vars` argument set to `TRUE` by default. If `FALSE`, group variables are not returned. * Fixed segmentation fault after calling `rename()` on an invalid grouped data frame (#2031). * `rename_vars()` gains a `strict` argument to control if an error is thrown when you try and rename a variable that doesn't exist. * Fixed undefined behavior for `slice()` on a zero-column data frame (#2490). * Fixed very rare case of false match during join (#2515). * Restricted workaround for `match()` to R 3.3.0. (#1858). * dplyr now warns on load when the version of R or Rcpp during installation is different to the currently installed version (#2514). * Fixed improper reuse of attributes when creating a list column in `summarise()` and perhaps `mutate()` (#2231). * `mutate()` and `summarise()` always strip the `names` attribute from new or updated columns, even for ungrouped operations (#1689). * Fixed rare error that could lead to a segmentation fault in `all_equal(ignore_col_order = FALSE)` (#2502). * The "dim" and "dimnames" attributes are always stripped when copying a vector (#1918, #2049). * `grouped_df` and `rowwise` are registered officially as S3 classes. This makes them easier to use with S4 (#2276, @joranE, #2789). * All operations that return tibbles now include the `"tbl"` class. This is important for correct printing with tibble 1.3.1 (#2789). * Makeflags uses PKG_CPPFLAGS for defining preprocessor macros. * astyle formatting for C++ code, tested but not changed as part of the tests (#2086, #2103). * Update RStudio project settings to install tests (#1952). * Using `Rcpp::interfaces()` to register C callable interfaces, and registering all native exported functions via `R_registerRoutines()` and `useDynLib(.registration = TRUE)` (#2146). * Formatting of grouped data frames now works by overriding the `tbl_sum()` generic instead of `print()`. This means that the output is more consistent with tibble, and that `format()` is now supported also for SQL sources (#2781). # dplyr 0.5.0 ## Breaking changes ### Existing functions * `arrange()` once again ignores grouping (#1206). * `distinct()` now only keeps the distinct variables. If you want to return all variables (using the first row for non-distinct values) use `.keep_all = TRUE` (#1110). For SQL sources, `.keep_all = FALSE` is implemented using `GROUP BY`, and `.keep_all = TRUE` raises an error (#1937, #1942, @krlmlr). (The default behaviour of using all variables when none are specified remains - this note only applies if you select some variables). * The select helper functions `starts_with()`, `ends_with()` etc are now real exported functions. This means that you'll need to import those functions if you're using from a package where dplyr is not attached. i.e. `dplyr::select(mtcars, starts_with("m"))` used to work, but now you'll need `dplyr::select(mtcars, dplyr::starts_with("m"))`. ### Deprecated and defunct functions * The long deprecated `chain()`, `chain_q()` and `%.%` have been removed. Please use `%>%` instead. * `id()` has been deprecated. Please use `group_indices()` instead (#808). * `rbind_all()` and `rbind_list()` are formally deprecated. Please use `bind_rows()` instead (#803). * Outdated benchmarking demos have been removed (#1487). * Code related to starting and signalling clusters has been moved out to [multidplyr](https://github.com/tidyverse/multidplyr). ## New functions * `coalesce()` finds the first non-missing value from a set of vectors. (#1666, thanks to @krlmlr for initial implementation). * `case_when()` is a general vectorised if + else if (#631). * `if_else()` is a vectorised if statement: it's a stricter (type-safe), faster, and more predictable version of `ifelse()`. In SQL it is translated to a `CASE` statement. * `na_if()` makes it easy to replace a certain value with an `NA` (#1707). In SQL it is translated to `NULL_IF`. * `near(x, y)` is a helper for `abs(x - y) < tol` (#1607). * `recode()` is vectorised equivalent to `switch()` (#1710). * `union_all()` method. Maps to `UNION ALL` for SQL sources, `bind_rows()` for data frames/tbl\_dfs, and `combine()` for vectors (#1045). * A new family of functions replace `summarise_each()` and `mutate_each()` (which will thus be deprecated in a future release). `summarise_all()` and `mutate_all()` apply a function to all columns while `summarise_at()` and `mutate_at()` operate on a subset of columns. These columns are selected with either a character vector of columns names, a numeric vector of column positions, or a column specification with `select()` semantics generated by the new `columns()` helper. In addition, `summarise_if()` and `mutate_if()` take a predicate function or a logical vector (these verbs currently require local sources). All these functions can now take ordinary functions instead of a list of functions generated by `funs()` (though this is only useful for local sources). (#1845, @lionel-) * `select_if()` lets you select columns with a predicate function. Only compatible with local sources. (#497, #1569, @lionel-) ## Local backends ### dtplyr All data table related code has been separated out in to a new dtplyr package. This decouples the development of the data.table interface from the development of the dplyr package. If both data.table and dplyr are loaded, you'll get a message reminding you to load dtplyr. ### Tibble Functions related to the creation and coercion of `tbl_df`s, now live in their own package: [tibble](https://posit.co/blog/tibble-1-0-0/). See `vignette("tibble")` for more details. * `$` and `[[` methods that never do partial matching (#1504), and throw an error if the variable does not exist. * `all_equal()` allows to compare data frames ignoring row and column order, and optionally ignoring minor differences in type (e.g. int vs. double) (#821). The test handles the case where the df has 0 columns (#1506). The test fails fails when convert is `FALSE` and types don't match (#1484). * `all_equal()` shows better error message when comparing raw values or when types are incompatible and `convert = TRUE` (#1820, @krlmlr). * `add_row()` makes it easy to add a new row to data frame (#1021) * `as_data_frame()` is now an S3 generic with methods for lists (the old `as_data_frame()`), data frames (trivial), and matrices (with efficient C++ implementation) (#876). It no longer strips subclasses. * The internals of `data_frame()` and `as_data_frame()` have been aligned, so `as_data_frame()` will now automatically recycle length-1 vectors. Both functions give more informative error messages if you attempting to create an invalid data frame. You can no longer create a data frame with duplicated names (#820). Both check for `POSIXlt` columns, and tell you to use `POSIXct` instead (#813). * `frame_data()` properly constructs rectangular tables (#1377, @kevinushey), and supports list-cols. * `glimpse()` is now a generic. The default method dispatches to `str()` (#1325). It now (invisibly) returns its first argument (#1570). * `lst()` and `lst_()` which create lists in the same way that `data_frame()` and `data_frame_()` create data frames (#1290). * `print.tbl_df()` is considerably faster if you have very wide data frames. It will now also only list the first 100 additional variables not already on screen - control this with the new `n_extra` parameter to `print()` (#1161). When printing a grouped data frame the number of groups is now printed with thousands separators (#1398). The type of list columns is correctly printed (#1379) * Package includes `setOldClass(c("tbl_df", "tbl", "data.frame"))` to help with S4 dispatch (#969). * `tbl_df` automatically generates column names (#1606). ### tbl_cube * new `as_data_frame.tbl_cube()` (#1563, @krlmlr). * `tbl_cube`s are now constructed correctly from data frames, duplicate dimension values are detected, missing dimension values are filled with `NA`. The construction from data frames now guesses the measure variables by default, and allows specification of dimension and/or measure variables (#1568, @krlmlr). * Swap order of `dim_names` and `met_name` arguments in `as.tbl_cube` (for `array`, `table` and `matrix`) for consistency with `tbl_cube` and `as.tbl_cube.data.frame`. Also, the `met_name` argument to `as.tbl_cube.table` now defaults to `"Freq"` for consistency with `as.data.frame.table` (@krlmlr, #1374). ## Remote backends * `as_data_frame()` on SQL sources now returns all rows (#1752, #1821, @krlmlr). * `compute()` gets new parameters `indexes` and `unique_indexes` that make it easier to add indexes (#1499, @krlmlr). * `db_explain()` gains a default method for DBIConnections (#1177). * The backend testing system has been improved. This lead to the removal of `temp_srcs()`. In the unlikely event that you were using this function, you can instead use `test_register_src()`, `test_load()`, and `test_frame()`. * You can now use `right_join()` and `full_join()` with remote tables (#1172). ### SQLite * `src_memdb()` is a session-local in-memory SQLite database. `memdb_frame()` works like `data_frame()`, but creates a new table in that database. * `src_sqlite()` now uses a stricter quoting character, `` ` ``, instead of `"`. SQLite "helpfully" will convert `"x"` into a string if there is no identifier called x in the current scope (#1426). * `src_sqlite()` throws errors if you try and use it with window functions (#907). ### SQL translation * `filter.tbl_sql()` now puts parens around each argument (#934). * Unary `-` is better translated (#1002). * `escape.POSIXt()` method makes it easier to use date times. The date is rendered in ISO 8601 format in UTC, which should work in most databases (#857). * `is.na()` gets a missing space (#1695). * `if`, `is.na()`, and `is.null()` get extra parens to make precedence more clear (#1695). * `pmin()` and `pmax()` are translated to `MIN()` and `MAX()` (#1711). * Window functions: * Work on ungrouped data (#1061). * Warning if order is not set on cumulative window functions. * Multiple partitions or ordering variables in windowed functions no longer generate extra parentheses, so should work for more databases (#1060) ### Internals This version includes an almost total rewrite of how dplyr verbs are translated into SQL. Previously, I used a rather ad-hoc approach, which tried to guess when a new subquery was needed. Unfortunately this approach was fraught with bugs, so in this version I've implemented a much richer internal data model. Now there is a three step process: 1. When applied to a `tbl_lazy`, each dplyr verb captures its inputs and stores in a `op` (short for operation) object. 2. `sql_build()` iterates through the operations building to build up an object that represents a SQL query. These objects are convenient for testing as they are lists, and are backend agnostics. 3. `sql_render()` iterates through the queries and generates the SQL, using generics (like `sql_select()`) that can vary based on the backend. In the short-term, this increased abstraction is likely to lead to some minor performance decreases, but the chance of dplyr generating correct SQL is much much higher. In the long-term, these abstractions will make it possible to write a query optimiser/compiler in dplyr, which would make it possible to generate much more succinct queries. If you have written a dplyr backend, you'll need to make some minor changes to your package: * `sql_join()` has been considerably simplified - it is now only responsible for generating the join query, not for generating the intermediate selects that rename the variable. Similarly for `sql_semi_join()`. If you've provided new methods in your backend, you'll need to rewrite. * `select_query()` gains a distinct argument which is used for generating queries for `distinct()`. It loses the `offset` argument which was never used (and hence never tested). * `src_translate_env()` has been replaced by `sql_translate_env()` which should have methods for the connection object. There were two other tweaks to the exported API, but these are less likely to affect anyone. * `translate_sql()` and `partial_eval()` got a new API: now use connection + variable names, rather than a `tbl`. This makes testing considerably easier. `translate_sql_q()` has been renamed to `translate_sql_()`. * Also note that the sql generation generics now have a default method, instead methods for DBIConnection and NULL. ## Minor improvements and bug fixes ### Single table verbs * Avoiding segfaults in presence of `raw` columns (#1803, #1817, @krlmlr). * `arrange()` fails gracefully on list columns (#1489) and matrices (#1870, #1945, @krlmlr). * `count()` now adds additional grouping variables, rather than overriding existing (#1703). `tally()` and `count()` can now count a variable called `n` (#1633). Weighted `count()`/`tally()` ignore `NA`s (#1145). * The progress bar in `do()` is now updated at most 20 times per second, avoiding unnecessary redraws (#1734, @mkuhn) * `distinct()` doesn't crash when given a 0-column data frame (#1437). * `filter()` throws an error if you supply an named arguments. This is usually a type: `filter(df, x = 1)` instead of `filter(df, x == 1)` (#1529). * `summarise()` correctly coerces factors with different levels (#1678), handles min/max of already summarised variable (#1622), and supports data frames as columns (#1425). * `select()` now informs you that it adds missing grouping variables (#1511). It works even if the grouping variable has a non-syntactic name (#1138). Negating a failed match (e.g. `select(mtcars, -contains("x"))`) returns all columns, instead of no columns (#1176) The `select()` helpers are now exported and have their own documentation (#1410). `one_of()` gives a useful error message if variables names are not found in data frame (#1407). * The naming behaviour of `summarise_each()` and `mutate_each()` has been tweaked so that you can force inclusion of both the function and the variable name: `summarise_each(mtcars, funs(mean = mean), everything())` (#442). * `mutate()` handles factors that are all `NA` (#1645), or have different levels in different groups (#1414). It disambiguates `NA` and `NaN` (#1448), and silently promotes groups that only contain `NA` (#1463). It deep copies data in list columns (#1643), and correctly fails on incompatible columns (#1641). `mutate()` on a grouped data no longer groups grouping attributes (#1120). `rowwise()` mutate gives expected results (#1381). * `one_of()` tolerates unknown variables in `vars`, but warns (#1848, @jennybc). * `print.grouped_df()` passes on `...` to `print()` (#1893). * `slice()` correctly handles grouped attributes (#1405). * `ungroup()` generic gains `...` (#922). ### Dual table verbs * `bind_cols()` matches the behaviour of `bind_rows()` and ignores `NULL` inputs (#1148). It also handles `POSIXct`s with integer base type (#1402). * `bind_rows()` handles 0-length named lists (#1515), promotes factors to characters (#1538), and warns when binding factor and character (#1485). bind_rows()` is more flexible in the way it can accept data frames, lists, list of data frames, and list of lists (#1389). * `bind_rows()` rejects `POSIXlt` columns (#1875, @krlmlr). * Both `bind_cols()` and `bind_rows()` infer classes and grouping information from the first data frame (#1692). * `rbind()` and `cbind()` get `grouped_df()` methods that make it harder to create corrupt data frames (#1385). You should still prefer `bind_rows()` and `bind_cols()`. * Joins now use correct class when joining on `POSIXct` columns (#1582, @joel23888), and consider time zones (#819). Joins handle a `by` that is empty (#1496), or has duplicates (#1192). Suffixes grow progressively to avoid creating repeated column names (#1460). Joins on string columns should be substantially faster (#1386). Extra attributes are ok if they are identical (#1636). Joins work correct when factor levels not equal (#1712, #1559). Anti- and semi-joins give correct result when by variable is a factor (#1571), but warn if factor levels are inconsistent (#2741). A clear error message is given for joins where an explicit `by` contains unavailable columns (#1928, #1932). Warnings about join column inconsistencies now contain the column names (#2728). * `inner_join()`, `left_join()`, `right_join()`, and `full_join()` gain a `suffix` argument which allows you to control what suffix duplicated variable names receive (#1296). * Set operations (`intersect()`, `union()` etc) respect coercion rules (#799). `setdiff()` handles factors with `NA` levels (#1526). * There were a number of fixes to enable joining of data frames that don't have the same encoding of column names (#1513), including working around bug 16885 regarding `match()` in R 3.3.0 (#1806, #1810, @krlmlr). ### Vector functions * `combine()` silently drops `NULL` inputs (#1596). * Hybrid `cummean()` is more stable against floating point errors (#1387). * Hybrid `lead()` and `lag()` received a considerable overhaul. They are more careful about more complicated expressions (#1588), and falls back more readily to pure R evaluation (#1411). They behave correctly in `summarise()` (#1434). and handle default values for string columns. * Hybrid `min()` and `max()` handle empty sets (#1481). * `n_distinct()` uses multiple arguments for data frames (#1084), falls back to R evaluation when needed (#1657), reverting decision made in (#567). Passing no arguments gives an error (#1957, #1959, @krlmlr). * `nth()` now supports negative indices to select from end, e.g. `nth(x, -2)` selects the 2nd value from the end of `x` (#1584). * `top_n()` can now also select bottom `n` values by passing a negative value to `n` (#1008, #1352). * Hybrid evaluation leaves formulas untouched (#1447). # dplyr 0.4.3 ## Improved encoding support Until now, dplyr's support for non-UTF8 encodings has been rather shaky. This release brings a number of improvement to fix these problems: it's probably not perfect, but should be a lot better than the previously version. This includes fixes to `arrange()` (#1280), `bind_rows()` (#1265), `distinct()` (#1179), and joins (#1315). `print.tbl_df()` also received a fix for strings with invalid encodings (#851). ## Other minor improvements and bug fixes * `frame_data()` provides a means for constructing `data_frame`s using a simple row-wise language. (#1358, @kevinushey) * `all.equal()` no longer runs all outputs together (#1130). * `as_data_frame()` gives better error message with NA column names (#1101). * `[.tbl_df` is more careful about subsetting column names (#1245). * `arrange()` and `mutate()` work on empty data frames (#1142). * `arrange()`, `filter()`, `slice()`, and `summarise()` preserve data frame meta attributes (#1064). * `bind_rows()` and `bind_cols()` accept lists (#1104): during initial data cleaning you no longer need to convert lists to data frames, but can instead feed them to `bind_rows()` directly. * `bind_rows()` gains a `.id` argument. When supplied, it creates a new column that gives the name of each data frame (#1337, @lionel-). * `bind_rows()` respects the `ordered` attribute of factors (#1112), and does better at comparing `POSIXct`s (#1125). The `tz` attribute is ignored when determining if two `POSIXct` vectors are comparable. If the `tz` of all inputs is the same, it's used, otherwise its set to `UTC`. * `data_frame()` always produces a `tbl_df` (#1151, @kevinushey) * `filter(x, TRUE, TRUE)` now just returns `x` (#1210), it doesn't internally modify the first argument (#971), and it now works with rowwise data (#1099). It once again works with data tables (#906). * `glimpse()` also prints out the number of variables in addition to the number of observations (@ilarischeinin, #988). * Joins handles matrix columns better (#1230), and can join `Date` objects with heterogeneous representations (some `Date`s are integers, while other are numeric). This also improves `all.equal()` (#1204). * Fixed `percent_rank()` and `cume_dist()` so that missing values no longer affect denominator (#1132). * `print.tbl_df()` now displays the class for all variables, not just those that don't fit on the screen (#1276). It also displays duplicated column names correctly (#1159). * `print.grouped_df()` now tells you how many groups there are. * `mutate()` can set to `NULL` the first column (used to segfault, #1329) and it better protects intermediary results (avoiding random segfaults, #1231). * `mutate()` on grouped data handles the special case where for the first few groups, the result consists of a `logical` vector with only `NA`. This can happen when the condition of an `ifelse` is an all `NA` logical vector (#958). * `mutate.rowwise_df()` handles factors (#886) and correctly handles 0-row inputs (#1300). * `n_distinct()` gains an `na_rm` argument (#1052). * The `Progress` bar used by `do()` now respects global option `dplyr.show_progress` (default is TRUE) so you can turn it off globally (@jimhester #1264, #1226). * `summarise()` handles expressions that returning heterogenous outputs, e.g. `median()`, which that sometimes returns an integer, and other times a numeric (#893). * `slice()` silently drops columns corresponding to an NA (#1235). * `ungroup.rowwise_df()` gives a `tbl_df` (#936). * More explicit duplicated column name error message (#996). * When "," is already being used as the decimal point (`getOption("OutDec")`), use "." as the thousands separator when printing out formatted numbers (@ilarischeinin, #988). ## Databases * `db_query_fields.SQLiteConnection` uses `build_sql` rather than `paste0` (#926, @NikNakk) * Improved handling of `log()` (#1330). * `n_distinct(x)` is translated to `COUNT(DISTINCT(x))` (@skparkes, #873). * `print(n = Inf)` now works for remote sources (#1310). ## Hybrid evaluation * Hybrid evaluation does not take place for objects with a class (#1237). * Improved `$` handling (#1134). * Simplified code for `lead()` and `lag()` and make sure they work properly on factors (#955). Both respect the `default` argument (#915). * `mutate` can set to `NULL` the first column (used to segfault, #1329). * `filter` on grouped data handles indices correctly (#880). * `sum()` issues a warning about integer overflow (#1108). # dplyr 0.4.2 This is a minor release containing fixes for a number of crashes and issues identified by R CMD CHECK. There is one new "feature": dplyr no longer complains about unrecognised attributes, and instead just copies them over to the output. * `lag()` and `lead()` for grouped data were confused about indices and therefore produced wrong results (#925, #937). `lag()` once again overrides `lag()` instead of just the default method `lag.default()`. This is necessary due to changes in R CMD check. To use the lag function provided by another package, use `pkg::lag`. * Fixed a number of memory issues identified by valgrind. * Improved performance when working with large number of columns (#879). * Lists-cols that contain data frames now print a slightly nicer summary (#1147) * Set operations give more useful error message on incompatible data frames (#903). * `all.equal()` gives the correct result when `ignore_row_order` is `TRUE` (#1065) and `all.equal()` correctly handles character missing values (#1095). * `bind_cols()` always produces a `tbl_df` (#779). * `bind_rows()` gains a test for a form of data frame corruption (#1074). * `bind_rows()` and `summarise()` now handles complex columns (#933). * Workaround for using the constructor of `DataFrame` on an unprotected object (#998) * Improved performance when working with large number of columns (#879). # dplyr 0.4.1 * Don't assume that RPostgreSQL is available. # dplyr 0.4.0 ## New features * `add_rownames()` turns row names into an explicit variable (#639). * `as_data_frame()` efficiently coerces a list into a data frame (#749). * `bind_rows()` and `bind_cols()` efficiently bind a list of data frames by row or column. `combine()` applies the same coercion rules to vectors (it works like `c()` or `unlist()` but is consistent with the `bind_rows()` rules). * `right_join()` (include all rows in `y`, and matching rows in `x`) and `full_join()` (include all rows in `x` and `y`) complete the family of mutating joins (#96). * `group_indices()` computes a unique integer id for each group (#771). It can be called on a grouped_df without any arguments or on a data frame with same arguments as `group_by()`. ## New vignettes * `vignette("data_frames")` describes dplyr functions that make it easier and faster to create and coerce data frames. It subsumes the old `memory` vignette. * `vignette("two-table")` describes how two-table verbs work in dplyr. ## Minor improvements * `data_frame()` (and `as_data_frame()` & `tbl_df()`) now explicitly forbid columns that are data frames or matrices (#775). All columns must be either a 1d atomic vector or a 1d list. * `do()` uses lazyeval to correctly evaluate its arguments in the correct environment (#744), and new `do_()` is the SE equivalent of `do()` (#718). You can modify grouped data in place: this is probably a bad idea but it's sometimes convenient (#737). `do()` on grouped data tables now passes in all columns (not all columns except grouping vars) (#735, thanks to @kismsu). `do()` with database tables no longer potentially includes grouping variables twice (#673). Finally, `do()` gives more consistent outputs when there are no rows or no groups (#625). * `first()` and `last()` preserve factors, dates and times (#509). * Overhaul of single table verbs for data.table backend. They now all use a consistent (and simpler) code base. This ensures that (e.g.) `n()` now works in all verbs (#579). * In `*_join()`, you can now name only those variables that are different between the two tables, e.g. `inner_join(x, y, c("a", "b", "c" = "d"))` (#682). If non-join columns are the same, dplyr will add `.x` and `.y` suffixes to distinguish the source (#655). * `mutate()` handles complex vectors (#436) and forbids `POSIXlt` results (instead of crashing) (#670). * `select()` now implements a more sophisticated algorithm so if you're doing multiples includes and excludes with and without names, you're more likely to get what you expect (#644). You'll also get a better error message if you supply an input that doesn't resolve to an integer column position (#643). * Printing has received a number of small tweaks. All `print()` methods invisibly return their input so you can interleave `print()` statements into a pipeline to see interim results. `print()` will column names of 0 row data frames (#652), and will never print more 20 rows (i.e. `options(dplyr.print_max)` is now 20), not 100 (#710). Row names are no never printed since no dplyr method is guaranteed to preserve them (#669). `glimpse()` prints the number of observations (#692) `type_sum()` gains a data frame method. * `summarise()` handles list output columns (#832) * `slice()` works for data tables (#717). Documentation clarifies that slice can't work with relational databases, and the examples show how to achieve the same results using `filter()` (#720). * dplyr now requires RSQLite >= 1.0. This shouldn't affect your code in any way (except that RSQLite now doesn't need to be attached) but does simplify the internals (#622). * Functions that need to combine multiple results into a single column (e.g. `join()`, `bind_rows()` and `summarise()`) are more careful about coercion. Joining factors with the same levels in the same order preserves the original levels (#675). Joining factors with non-identical levels generates a warning and coerces to character (#684). Joining a character to a factor (or vice versa) generates a warning and coerces to character. Avoid these warnings by ensuring your data is compatible before joining. `rbind_list()` will throw an error if you attempt to combine an integer and factor (#751). `rbind()`ing a column full of `NA`s is allowed and just collects the appropriate missing value for the column type being collected (#493). `summarise()` is more careful about `NA`, e.g. the decision on the result type will be delayed until the first non NA value is returned (#599). It will complain about loss of precision coercions, which can happen for expressions that return integers for some groups and a doubles for others (#599). * A number of functions gained new or improved hybrid handlers: `first()`, `last()`, `nth()` (#626), `lead()` & `lag()` (#683), `%in%` (#126). That means when you use these functions in a dplyr verb, we handle them in C++, rather than calling back to R, and hence improving performance. Hybrid `min_rank()` correctly handles `NaN` values (#726). Hybrid implementation of `nth()` falls back to R evaluation when `n` is not a length one integer or numeric, e.g. when it's an expression (#734). Hybrid `dense_rank()`, `min_rank()`, `cume_dist()`, `ntile()`, `row_number()` and `percent_rank()` now preserve NAs (#774) * `filter` returns its input when it has no rows or no columns (#782). * Join functions keep attributes (e.g. time zone information) from the left argument for `POSIXct` and `Date` objects (#819), and only only warn once about each incompatibility (#798). ## Bug fixes * `[.tbl_df` correctly computes row names for 0-column data frames, avoiding problems with xtable (#656). `[.grouped_df` will silently drop grouping if you don't include the grouping columns (#733). * `data_frame()` now acts correctly if the first argument is a vector to be recycled. (#680 thanks @jimhester) * `filter.data.table()` works if the table has a variable called "V1" (#615). * `*_join()` keeps columns in original order (#684). Joining a factor to a character vector doesn't segfault (#688). `*_join` functions can now deal with multiple encodings (#769), and correctly name results (#855). * `*_join.data.table()` works when data.table isn't attached (#786). * `group_by()` on a data table preserves original order of the rows (#623). `group_by()` supports variables with more than 39 characters thanks to a fix in lazyeval (#705). It gives meaningful error message when a variable is not found in the data frame (#716). * `grouped_df()` requires `vars` to be a list of symbols (#665). * `min(.,na.rm = TRUE)` works with `Date`s built on numeric vectors (#755). * `rename_()` generic gets missing `.dots` argument (#708). * `row_number()`, `min_rank()`, `percent_rank()`, `dense_rank()`, `ntile()` and `cume_dist()` handle data frames with 0 rows (#762). They all preserve missing values (#774). `row_number()` doesn't segfault when giving an external variable with the wrong number of variables (#781). * `group_indices` handles the edge case when there are no variables (#867). * Removed bogus `NAs introduced by coercion to integer range` on 32-bit Windows (#2708). # dplyr 0.3.0.1 * Fixed problem with test script on Windows. # dplyr 0.3 ## New functions * `between()` vector function efficiently determines if numeric values fall in a range, and is translated to special form for SQL (#503). * `count()` makes it even easier to do (weighted) counts (#358). * `data_frame()` by @kevinushey is a nicer way of creating data frames. It never coerces column types (no more `stringsAsFactors = FALSE`!), never munges column names, and never adds row names. You can use previously defined columns to compute new columns (#376). * `distinct()` returns distinct (unique) rows of a tbl (#97). Supply additional variables to return the first row for each unique combination of variables. * Set operations, `intersect()`, `union()` and `setdiff()` now have methods for data frames, data tables and SQL database tables (#93). They pass their arguments down to the base functions, which will ensure they raise errors if you pass in two many arguments. * Joins (e.g. `left_join()`, `inner_join()`, `semi_join()`, `anti_join()`) now allow you to join on different variables in `x` and `y` tables by supplying a named vector to `by`. For example, `by = c("a" = "b")` joins `x.a` to `y.b`. * `n_groups()` function tells you how many groups in a tbl. It returns 1 for ungrouped data. (#477) * `transmute()` works like `mutate()` but drops all variables that you didn't explicitly refer to (#302). * `rename()` makes it easy to rename variables - it works similarly to `select()` but it preserves columns that you didn't otherwise touch. * `slice()` allows you to selecting rows by position (#226). It includes positive integers, drops negative integers and you can use expression like `n()`. ## Programming with dplyr (non-standard evaluation) * You can now program with dplyr - every function that does non-standard evaluation (NSE) has a standard evaluation (SE) version ending in `_`. This is powered by the new lazyeval package which provides all the tools needed to implement NSE consistently and correctly. * See `vignette("nse")` for full details. * `regroup()` is deprecated. Please use the more flexible `group_by_()` instead. * `summarise_each_q()` and `mutate_each_q()` are deprecated. Please use `summarise_each_()` and `mutate_each_()` instead. * `funs_q` has been replaced with `funs_`. ## Removed and deprecated features * `%.%` has been deprecated: please use `%>%` instead. `chain()` is defunct. (#518) * `filter.numeric()` removed. Need to figure out how to reimplement with new lazy eval system. * The `Progress` refclass is no longer exported to avoid conflicts with shiny. Instead use `progress_estimated()` (#535). * `src_monetdb()` is now implemented in MonetDB.R, not dplyr. * `show_sql()` and `explain_sql()` and matching global options `dplyr.show_sql` and `dplyr.explain_sql` have been removed. Instead use `show_query()` and `explain()`. ## Minor improvements and bug fixes * Main verbs now have individual documentation pages (#519). * `%>%` is simply re-exported from magrittr, instead of creating a local copy (#496, thanks to @jimhester) * Examples now use `nycflights13` instead of `hflights` because it the variables have better names and there are a few interlinked tables (#562). `Lahman` and `nycflights13` are (once again) suggested packages. This means many examples will not work unless you explicitly install them with `install.packages(c("Lahman", "nycflights13"))` (#508). dplyr now depends on Lahman 3.0.1. A number of examples have been updated to reflect modified field names (#586). * `do()` now displays the progress bar only when used in interactive prompts and not when knitting (#428, @jimhester). * `glimpse()` now prints a trailing new line (#590). * `group_by()` has more consistent behaviour when grouping by constants: it creates a new column with that value (#410). It renames grouping variables (#410). The first argument is now `.data` so you can create new groups with name x (#534). * Now instead of overriding `lag()`, dplyr overrides `lag.default()`, which should avoid clobbering lag methods added by other packages. (#277). * `mutate(data, a = NULL)` removes the variable `a` from the returned dataset (#462). * `trunc_mat()` and hence `print.tbl_df()` and friends gets a `width` argument to control the default output width. Set `options(dplyr.width = Inf)` to always show all columns (#589). * `select()` gains `one_of()` selector: this allows you to select variables provided by a character vector (#396). It fails immediately if you give an empty pattern to `starts_with()`, `ends_with()`, `contains()` or `matches()` (#481, @leondutoit). Fixed buglet in `select()` so that you can now create variables called `val` (#564). * Switched from RC to R6. * `tally()` and `top_n()` work consistently: neither accidentally evaluates the `wt` param. (#426, @mnel) * `rename` handles grouped data (#640). ## Minor improvements and bug fixes by backend ### Databases * Correct SQL generation for `paste()` when used with the collapse parameter targeting a Postgres database. (@rbdixon, #1357) * The db backend system has been completely overhauled in order to make it possible to add backends in other packages, and to support a much wider range of databases. See `vignette("new-sql-backend")` for instruction on how to create your own (#568). * `src_mysql()` gains a method for `explain()`. * When `mutate()` creates a new variable that uses a window function, automatically wrap the result in a subquery (#484). * Correct SQL generation for `first()` and `last()` (#531). * `order_by()` now works in conjunction with window functions in databases that support them. ### Data frames/`tbl_df` * All verbs now understand how to work with `difftime()` (#390) and `AsIs` (#453) objects. They all check that colnames are unique (#483), and are more robust when columns are not present (#348, #569, #600). * Hybrid evaluation bugs fixed: * Call substitution stopped too early when a sub expression contained a `$` (#502). * Handle `::` and `:::` (#412). * `cumany()` and `cumall()` properly handle `NA` (#408). * `nth()` now correctly preserve the class when using dates, times and factors (#509). * no longer substitutes within `order_by()` because `order_by()` needs to do its own NSE (#169). * `[.tbl_df` always returns a tbl_df (i.e. `drop = FALSE` is the default) (#587, #610). `[.grouped_df` preserves important output attributes (#398). * `arrange()` keeps the grouping structure of grouped data (#491, #605), and preserves input classes (#563). * `contains()` accidentally matched regular expressions, now it passes `fixed = TRUE` to `grep()` (#608). * `filter()` asserts all variables are white listed (#566). * `mutate()` makes a `rowwise_df` when given a `rowwise_df` (#463). * `rbind_all()` creates `tbl_df` objects instead of raw `data.frame`s. * If `select()` doesn't match any variables, it returns a 0-column data frame, instead of the original (#498). It no longer fails when if some columns are not named (#492) * `sample_n()` and `sample_frac()` methods for data.frames exported. (#405, @alyst) * A grouped data frame may have 0 groups (#486). Grouped df objects gain some basic validity checking, which should prevent some crashes related to corrupt `grouped_df` objects made by `rbind()` (#606). * More coherence when joining columns of compatible but different types, e.g. when joining a character vector and a factor (#455), or a numeric and integer (#450) * `mutate()` works for on zero-row grouped data frame, and with list columns (#555). * `LazySubset` was confused about input data size (#452). * Internal `n_distinct()` is stricter about its inputs: it requires one symbol which must be from the data frame (#567). * `rbind_*()` handle data frames with 0 rows (#597). They fill character vector columns with `NA` instead of blanks (#595). They work with list columns (#463). * Improved handling of encoding for column names (#636). * Improved handling of hybrid evaluation re $ and @ (#645). ### Data tables * Fix major omission in `tbl_dt()` and `grouped_dt()` methods - I was accidentally doing a deep copy on every result :( * `summarise()` and `group_by()` now retain over-allocation when working with data.tables (#475, @arunsrinivasan). * joining two data.tables now correctly dispatches to data table methods, and result is a data table (#470) ### Cubes * `summarise.tbl_cube()` works with single grouping variable (#480). # dplyr 0.2 ## Piping dplyr now imports `%>%` from magrittr (#330). I recommend that you use this instead of `%.%` because it is easier to type (since you can hold down the shift key) and is more flexible. With you `%>%`, you can control which argument on the RHS receives the LHS by using the pronoun `.`. This makes `%>%` more useful with base R functions because they don't always take the data frame as the first argument. For example you could pipe `mtcars` to `xtabs()` with: mtcars %>% xtabs( ~ cyl + vs, data = .) Thanks to @smbache for the excellent magrittr package. dplyr only provides `%>%` from magrittr, but it contains many other useful functions. To use them, load `magrittr` explicitly: `library(magrittr)`. For more details, see `vignette("magrittr")`. `%.%` will be deprecated in a future version of dplyr, but it won't happen for a while. I've also deprecated `chain()` to encourage a single style of dplyr usage: please use `%>%` instead. ## Do `do()` has been completely overhauled. There are now two ways to use it, either with multiple named arguments or a single unnamed arguments. `group_by()` + `do()` is equivalent to `plyr::dlply`, except it always returns a data frame. If you use named arguments, each argument becomes a list-variable in the output. A list-variable can contain any arbitrary R object so it's particularly well suited for storing models. library(dplyr) models <- mtcars %>% group_by(cyl) %>% do(lm = lm(mpg ~ wt, data = .)) models %>% summarise(rsq = summary(lm)$r.squared) If you use an unnamed argument, the result should be a data frame. This allows you to apply arbitrary functions to each group. mtcars %>% group_by(cyl) %>% do(head(., 1)) Note the use of the `.` pronoun to refer to the data in the current group. `do()` also has an automatic progress bar. It appears if the computation takes longer than 5 seconds and lets you know (approximately) how much longer the job will take to complete. ## New verbs dplyr 0.2 adds three new verbs: * `glimpse()` makes it possible to see all the columns in a tbl, displaying as much data for each variable as can be fit on a single line. * `sample_n()` randomly samples a fixed number of rows from a tbl; `sample_frac()` randomly samples a fixed fraction of rows. Only works for local data frames and data tables (#202). * `summarise_each()` and `mutate_each()` make it easy to apply one or more functions to multiple columns in a tbl (#178). ## Minor improvements * If you load plyr after dplyr, you'll get a message suggesting that you load plyr first (#347). * `as.tbl_cube()` gains a method for matrices (#359, @paulstaab) * `compute()` gains `temporary` argument so you can control whether the results are temporary or permanent (#382, @cpsievert) * `group_by()` now defaults to `add = FALSE` so that it sets the grouping variables rather than adding to the existing list. I think this is how most people expected `group_by` to work anyway, so it's unlikely to cause problems (#385). * Support for [MonetDB](http://www.monetdb.org) tables with `src_monetdb()` (#8, thanks to @hannesmuehleisen). * New vignettes: * `memory` vignette which discusses how dplyr minimises memory usage for local data frames (#198). * `new-sql-backend` vignette which discusses how to add a new SQL backend/source to dplyr. * `changes()` output more clearly distinguishes which columns were added or deleted. * `explain()` is now generic. * dplyr is more careful when setting the keys of data tables, so it never accidentally modifies an object that it doesn't own. It also avoids unnecessary key setting which negatively affected performance. (#193, #255). * `print()` methods for `tbl_df`, `tbl_dt` and `tbl_sql` gain `n` argument to control the number of rows printed (#362). They also works better when you have columns containing lists of complex objects. * `row_number()` can be called without arguments, in which case it returns the same as `1:n()` (#303). * `"comment"` attribute is allowed (white listed) as well as names (#346). * hybrid versions of `min`, `max`, `mean`, `var`, `sd` and `sum` handle the `na.rm` argument (#168). This should yield substantial performance improvements for those functions. * Special case for call to `arrange()` on a grouped data frame with no arguments. (#369) ## Bug fixes * Code adapted to Rcpp > 0.11.1 * internal `DataDots` class protects against missing variables in verbs (#314), including the case where `...` is missing. (#338) * `all.equal.data.frame` from base is no longer bypassed. we now have `all.equal.tbl_df` and `all.equal.tbl_dt` methods (#332). * `arrange()` correctly handles NA in numeric vectors (#331) and 0 row data frames (#289). * `copy_to.src_mysql()` now works on windows (#323) * `*_join()` doesn't reorder column names (#324). * `rbind_all()` is stricter and only accepts list of data frames (#288) * `rbind_*` propagates time zone information for `POSIXct` columns (#298). * `rbind_*` is less strict about type promotion. The numeric `Collecter` allows collection of integer and logical vectors. The integer `Collecter` also collects logical values (#321). * internal `sum` correctly handles integer (under/over)flow (#308). * `summarise()` checks consistency of outputs (#300) and drops `names` attribute of output columns (#357). * join functions throw error instead of crashing when there are no common variables between the data frames, and also give a better error message when only one data frame has a by variable (#371). * `top_n()` returns `n` rows instead of `n - 1` (@leondutoit, #367). * SQL translation always evaluates subsetting operators (`$`, `[`, `[[`) locally. (#318). * `select()` now renames variables in remote sql tbls (#317) and implicitly adds grouping variables (#170). * internal `grouped_df_impl` function errors if there are no variables to group by (#398). * `n_distinct` did not treat NA correctly in the numeric case #384. * Some compiler warnings triggered by -Wall or -pedantic have been eliminated. * `group_by` only creates one group for NA (#401). * Hybrid evaluator did not evaluate expression in correct environment (#403). # dplyr 0.1.3 ## Bug fixes * `select()` actually renames columns in a data table (#284). * `rbind_all()` and `rbind_list()` now handle missing values in factors (#279). * SQL joins now work better if names duplicated in both x and y tables (#310). * Builds against Rcpp 0.11.1 * `select()` correctly works with the vars attribute (#309). * Internal code is stricter when deciding if a data frame is grouped (#308): this avoids a number of situations which previously caused problems. * More data frame joins work with missing values in keys (#306). # dplyr 0.1.2 ## New features * `select()` is substantially more powerful. You can use named arguments to rename existing variables, and new functions `starts_with()`, `ends_with()`, `contains()`, `matches()` and `num_range()` to select variables based on their names. It now also makes a shallow copy, substantially reducing its memory impact (#158, #172, #192, #232). * `summarize()` added as alias for `summarise()` for people from countries that don't don't spell things correctly ;) (#245) ## Bug fixes * `filter()` now fails when given anything other than a logical vector, and correctly handles missing values (#249). `filter.numeric()` proxies `stats::filter()` so you can continue to use `filter()` function with numeric inputs (#264). * `summarise()` correctly uses newly created variables (#259). * `mutate()` correctly propagates attributes (#265) and `mutate.data.frame()` correctly mutates the same variable repeatedly (#243). * `lead()` and `lag()` preserve attributes, so they now work with dates, times and factors (#166). * `n()` never accepts arguments (#223). * `row_number()` gives correct results (#227). * `rbind_all()` silently ignores data frames with 0 rows or 0 columns (#274). * `group_by()` orders the result (#242). It also checks that columns are of supported types (#233, #276). * The hybrid evaluator did not handle some expressions correctly, for example in `if(n() > 5) 1 else 2` the subexpression `n()` was not substituted correctly. It also correctly processes `$` (#278). * `arrange()` checks that all columns are of supported types (#266). It also handles list columns (#282). * Working towards Solaris compatibility. * Benchmarking vignette temporarily disabled due to microbenchmark problems reported by BDR. # dplyr 0.1.1 ## Improvements * new `location()` and `changes()` functions which provide more information about how data frames are stored in memory so that you can see what gets copied. * renamed `explain_tbl()` to `explain()` (#182). * `tally()` gains `sort` argument to sort output so highest counts come first (#173). * `ungroup.grouped_df()`, `tbl_df()`, `as.data.frame.tbl_df()` now only make shallow copies of their inputs (#191). * The `benchmark-baseball` vignette now contains fairer (including grouping times) comparisons with `data.table`. (#222) ## Bug fixes * `filter()` (#221) and `summarise()` (#194) correctly propagate attributes. * `summarise()` throws an error when asked to summarise an unknown variable instead of crashing (#208). * `group_by()` handles factors with missing values (#183). * `filter()` handles scalar results (#217) and better handles scoping, e.g. `filter(., variable)` where `variable` is defined in the function that calls `filter`. It also handles `T` and `F` as aliases to `TRUE` and `FALSE` if there are no `T` or `F` variables in the data or in the scope. * `select.grouped_df` fails when the grouping variables are not included in the selected variables (#170) * `all.equal.data.frame()` handles a corner case where the data frame has `NULL` names (#217) * `mutate()` gives informative error message on unsupported types (#179) * dplyr source package no longer includes pandas benchmark, reducing download size from 2.8 MB to 0.5 MB. dplyr/MD50000644000176200001440000006204414525714672011737 0ustar liggesusersffa95bf481e89f6c449fdfc52dcfb2e1 *DESCRIPTION 371bde6d0dbcacd6d7b0e203b5425671 *LICENSE 61dc69480c92fcfed18bf122d5dc458f *NAMESPACE dff112d4c7c2f08a57d78618cac185d5 *NEWS.md 89fb3a746f4cce247c8834ca97a170ad *R/across.R cba0589edad47ddde00a0cfc52f1fafc *R/all-equal.R 692cd8205e2c9b34ab7265b4610e48f6 *R/arrange.R 7ed9cdd72acac6298478d4d9d5f918c7 *R/bind-cols.R 4fe57e0fcc021ac4e037c37728e94ff2 *R/bind-rows.R 9680674be2e7fbbe474df6737de23077 *R/by.R f7adb0911fafb54ecb61f1f529c80f91 *R/case-match.R 6c0ec9a7bfeb62e0651ee92d26067f6b *R/case-when.R eee53af21fb750a5aa927dc138f6b0b0 *R/coalesce.R 0aa23834321ce1172bb6ff3880865cfe *R/colwise-arrange.R 15e3608a5e2f10a110c7d1f68eb74fe7 *R/colwise-distinct.R 7e55dc474902cdb83c1b36ec00b5b58c *R/colwise-filter.R caeb62cf2d7ebc0300cabfbe967a056b *R/colwise-funs.R 42a6b18f512ceb67a3a47be21e7902c5 *R/colwise-group-by.R 478553935426578fba7a5f2522734c30 *R/colwise-mutate.R 667f3485690d97b8c51e93afcd6a0708 *R/colwise-select.R 7a10f42cd11d870e2e861820766a1851 *R/colwise.R 689d6a8d8b0cdec20ba7d803ca87a746 *R/compat-dbplyr.R 16164284d5edf6cf1f91760d794e206f *R/compat-name-repair.R 15f3e1465b592d3a2c9578218bb22cb2 *R/compute-collect.R 86895553045fa05c895e5f49431676fa *R/conditions.R 2ee0184673021d6f5df3a24e995b5af4 *R/consecutive-id.R bf594e38b5843b8e316fa71fae5114ca *R/context.R 2cd9146afaf64bd50e52f613ed33e068 *R/copy-to.R 888513d25415767778e6132710d88f33 *R/count-tally.R 5b876ab8c3207fd6b7d12c64085d3f74 *R/data-bands.R ede7d17c19699ca08527aa57510e9796 *R/data-mask.R 3d5101b0c532799921a8278ca363f10c *R/data-starwars.R 494d13fd7d5bcc81f12c0b0c642bacf4 *R/data-storms.R 4a449f448b2fb6147dabc4b40a74ec20 *R/dbplyr.R d9112b8571ad97ca0afa4c30439ef247 *R/defunct.R c901caeb5574a1dcd14163de3f376a4b *R/deprec-combine.R 0f6a2052168237fc0b6c6133f57b82ec *R/deprec-context.R 915cafdc5f80caa7da61a1a9b217bb0a *R/deprec-dbi.R d3943fecff95d67bf0a8fc8c98d5b6ac *R/deprec-do.R b945c886ce708311e78c4991044dce1c *R/deprec-funs.R cc4cd688f89ef19dc0403739ce8d0972 *R/deprec-lazyeval.R 13b9c96b2517c457c94b6d678cb37602 *R/deprec-src-local.R 8678e870dddbd4a321488fea1a34bd95 *R/deprec-tibble.R c172e38993233c800f307356a1bd784f *R/desc.R 3c0715c8d06827285718b61e76e5962a *R/distinct.R 3c3e0e9d1442619923b330c4da63d8e5 *R/doc-methods.R d6802b61428b201738f7445cf52961fe *R/doc-params.R 15be552cf4dafce0dadf0fecc648d83d *R/dplyr.R afb8acbbed05a93882c6df31d50a74be *R/error.R d7e2741ed506693fd37e32e0c5a61925 *R/explain.R b952d2fbc1d1922706ac20658373db1d *R/filter.R a332b3ac7224676c924737a90677429a *R/funs.R e101d86386a9450e265de72a6f7bb8d4 *R/generics.R b34166143e7d10c768fbf3573333a624 *R/group-by.R d175835360262d7eda44798ddd8493e6 *R/group-data.R cdef7a1809729ad990435e0b5def4aec *R/group-map.R 72846404c8ac329a648419d5a808c9d2 *R/group-nest.R 8160c7ab3862e2d916a3c3040cb5b654 *R/group-split.R d78c2c28598c28cb83f819ca27a29820 *R/group-trim.R 59e62139d5fff5860d8b8004135aca44 *R/grouped-df.R 676f30e742728c20a4a6a9aaaa318718 *R/groups-with.R 7943a1b57098aac76061fbc04c6579d5 *R/if-else.R 4b7dd15cbf0c047fe7bc497f9e5f878c *R/import-standalone-lazyeval.R 8c5f08458cfd4e7f75a27924b2789c83 *R/import-standalone-obj-type.R 302d9aa7fe4217f36d09bdf2496b82ac *R/import-standalone-purrr.R 09c045dfb7be704d32aec050f68a17a3 *R/import-standalone-types-check.R a10d865f6d8d675614a476becac93c25 *R/join-by.R 663f98b4245df54bc582bdc78ca0603e *R/join-cols.R 0ebeab1a3aa19082288ea49228e27098 *R/join-common-by.R 42c832b8b0438c60083d417a7cd6aefd *R/join-cross.R 6402c097ff07de24cb7c5cc349b02571 *R/join-rows.R b2ab2ae6c3ca78200d77bcda90f7f1fc *R/join.R dfb9e66a0735db16500aa55c3a94683b *R/lead-lag.R 7c1ac1e29ec7348c4cd794951f22d873 *R/locale.R 76060bf3889cc65ff9257b683df160f9 *R/mutate.R e88a7e03664da18eb88b92840c0ba369 *R/n-distinct.R 42ec8285017ef013b202230fdff50017 *R/na-if.R 9f92869c84547da11884ad5523fc4697 *R/near.R 4389e39afab2aeaeba6bc7f5bdb7ca06 *R/nest-by.R bb394f8b7b1355a6b0c6884abd19d286 *R/nth-value.R 1b12fa5fc953c40eb6b4e0ac91e3b2a2 *R/order-by.R a038449e787ab02e5b8f56ca19988da7 *R/pick.R ba1678b2b88d529a0b4c5a4575fed0a4 *R/progress.R 8b9360dafe4fbb505ce6fd7e66018e7c *R/pull.R 69bc37c26a730068cb21ea325abf6949 *R/rank.R f85185836465b6e12d55a5fffb7a5ad8 *R/recode.R 9aab2581ff22019c5db573e06686f2e1 *R/reexport-magrittr.R 17d906204424e6a444215e4a28096649 *R/reexport-pillar.R a77255a9f80ab02feb7dabca4d829344 *R/reexport-tibble.R 80d1bbebe05f6a945aa3cd8e4df42045 *R/reframe.R 13ec51bc0f7d28730a9ece8d2414e860 *R/relocate.R c2509127b56aad36b2d4ac7cbbb5a666 *R/rename.R 90370d982b3034c90519b47739be9a54 *R/rows.R 7611d5974f7e94536d3fe17fcd26e8d9 *R/rowwise.R d21c768528e4527c43705e6c047d0515 *R/sample.R 0f24a357f8901f3ab4b97072c27ba58a *R/select-helpers.R 32991e2cda24d607794b82e434a06930 *R/select.R 7511f934c864cd2ea37ab4b26ced1cd5 *R/sets.R 5ba39d21c086c7f824f12500a3eb6241 *R/slice.R 77c8ec30f341182605264f405c61f5e1 *R/src-dbi.R c3b80d897d87e20f702dc2b1ed49f9b0 *R/src.R c3d4fbbfda38de9c8d025110cf49f85a *R/summarise.R ad974aa7c48f230c9c17097e17d45f66 *R/tbl.R 483f245e38cefd9051aa5d12b4b21899 *R/top-n.R c79e30ed1968d78b4e399ed439bddc75 *R/transmute.R 482c17e9bda3631c27dcd7d0b5282b1d *R/ts.R 4c23ccc79b0837ad8f647980d3afb384 *R/utils-format.R af7b631a3b93ef46ae7037db17c94a5b *R/utils-tidy-eval.R baaf972d5d963ba01cd6d7691d1341b6 *R/utils.R 6c63c2c466fb1417d0a332f87aef8ffa *R/vctrs.R 5b63c2e6a0589580029d17e56bfd7f24 *R/vec-case-match.R 67bde6c9bdecbcf96eda9b672737e600 *R/vec-case-when.R 1b2e2b1adc8494d2681940e9fafb5c41 *R/zzz.R 5ef138b9d72a74e8d05123e6b2bc7830 *README.md 5970a4d0d19d3b002ca221bdb617b91e *build/dplyr.pdf c36e76ce9f51b27459472b20261d5b8c *build/stage23.rdb dde52aa62da304e8c87bf65b95063740 *build/vignette.rds a79561c8013e7a7f3c23d509f4918bf8 *data/band_instruments.rda 3aa4b1478fc31219480e88c876c3aeed *data/band_instruments2.rda 4d44ad5e4198daccbd4227dca895750b *data/band_members.rda b7f0799100f2923b9a3e00b87e5db157 *data/starwars.rda 6c52b1c842944c85e381b299aa7352f8 *data/storms.rda e91f30b848a747c9cb9c94b0c0c492c8 *inst/doc/base.R d05ab21a50c52118a742f9357fd9552d *inst/doc/base.Rmd af178180602eabac5bb5b28e767d9177 *inst/doc/base.html 3a77f6f09c00cf092b4f99f1299e6226 *inst/doc/colwise.R a6504b70bb56595da80ebdeacb3a4571 *inst/doc/colwise.Rmd 030d53dce6b8365affe53062bdacdb29 *inst/doc/colwise.html b27f1d5cd903bd6b1dc42ee5f3284248 *inst/doc/dplyr.R f2dccae5a13cc4e1d7c6f80b6d7bd67c *inst/doc/dplyr.Rmd 1cbb089611ba69fe8b521394f40e7169 *inst/doc/dplyr.html 7c9f649afc138fdd9961fab1a1c48511 *inst/doc/grouping.R 13adad66b3809dea3a6a5ff412f0763e *inst/doc/grouping.Rmd 8b87fbc57a2d3b9f10304c23459b9063 *inst/doc/grouping.html 6748002b790124a139476c97170a5102 *inst/doc/in-packages.R aef8e1d6c3de3710586366a8b3ea46f9 *inst/doc/in-packages.Rmd c2b4e5bcd9cac85c194d500346e2f85a *inst/doc/in-packages.html 89e58f52075147000042e2bf97285837 *inst/doc/programming.R 7755a76ddfed8b48450ff7663b01fd6f *inst/doc/programming.Rmd f2e4665ea5c7f2c6a98f716f3c9bc61d *inst/doc/programming.html 32fb28c9968edbd66d3ded9e659eafa1 *inst/doc/rowwise.R e7d21f3fe88c93594c939b38ac1c1205 *inst/doc/rowwise.Rmd 2b236b0719e9dfb920bcd0414f92c365 *inst/doc/rowwise.html 703c679de2fb9dc99a171c24ec974776 *inst/doc/two-table.R fdb7682a8f09c1976c19f30eaabc8133 *inst/doc/two-table.Rmd 077e9cb8d683bda448461354194f34f5 *inst/doc/two-table.html 000a0fd0738f55f98a3569fe3f558519 *inst/doc/window-functions.R 976537d75de5855dbab47dc63d638545 *inst/doc/window-functions.Rmd 918e48a1f96b6f0e4e404e4cb597ef12 *inst/doc/window-functions.html 4943c9c53ad3a81d1dc1a34c4d93b5bc *man/across.Rd 640fa7c9a5c2a5d89513ec3f0014a948 *man/add_rownames.Rd 5e5865f79f9151d43c2d1a26807fd077 *man/all_equal.Rd fc3ca5a449dfba1b9c181d703989edcf *man/all_vars.Rd 42e38fdc29182b807e24a529751ecb76 *man/args_by.Rd 6c28ef6f8dd793dc844116269a0ee565 *man/arrange.Rd d87293bdf9521af556b81c233f29aeaa *man/arrange_all.Rd c0986838d8faabf1cedeec484a840a65 *man/auto_copy.Rd 1fe1b4696a46635cf0566543c167fcea *man/backend_dbplyr.Rd 95f8bf5158b02ad31c35c25101661326 *man/band_members.Rd 99f947e45c2399654efbf74c3c4ae7f7 *man/between.Rd 6452d27792ace197bd1debc150688a6f *man/bind_cols.Rd 0afa1ffc285d2834c5a41964dedb6107 *man/bind_rows.Rd 989e819e3e26c0cde712d71945344e42 *man/c_across.Rd 97f3386aa4a18386f13c77c5eb29178c *man/case_match.Rd 199630865bf5197cbefad995cabadf00 *man/case_when.Rd fd4543fb2f91b2d10c3f1c456c8928fe *man/check_dbplyr.Rd 6279802688e385b3f74546716e249d94 *man/coalesce.Rd b5605cf1ee65b932b89dda836ab643c2 *man/combine.Rd dde6e8ebe2d80016fa0e555593df4736 *man/common_by.Rd b22cc0696c058cd28c8c8df8dce89bf1 *man/compute.Rd 7612c6d799a89c0f596fb11f17be1f9f *man/consecutive_id.Rd cbc6180183cce5d7539e426acb0105b6 *man/context.Rd 015b3027d291b0e7432f9b35430e381b *man/copy_to.Rd 1dbde4ef580a073c5701d7df53b10372 *man/count.Rd 13dc8db480d1284208ab6597ebb9ff59 *man/cross_join.Rd 3aeecc5b0d5e319adc921604e3c5b364 *man/cumall.Rd 76411b2918f84780d3cdeec196a56939 *man/defunct.Rd ad2b164deec391787f59795a9c25e905 *man/deprec-context.Rd 3ae68d8672eb304430c1552403a10508 *man/desc.Rd f3844446fa59fedc153689579defecb0 *man/dim_desc.Rd 6c1fb597f5efb7dcba2bddc316629a67 *man/distinct.Rd f38d8386af87e6000c068ce6c122a092 *man/distinct_all.Rd 055d2630bd3939b6f515e310bdca8e61 *man/do.Rd 5052882b8ae27cff3040c553d4f99c92 *man/dplyr-locale.Rd b9aad8050f72309d5cbabc61e7fb8b5b *man/dplyr-package.Rd 416c7d806f2cfe83655513ace6aa312c *man/dplyr_by.Rd a8a41bb128dbb47187566c45bfdfca04 *man/dplyr_data_masking.Rd daa95cb13d7485484ca0f4b4212202c6 *man/dplyr_extending.Rd 77bad0ab5896994bc755cf7e4b404c75 *man/dplyr_tidy_select.Rd 1e3cdaf2358d8ed1dc3f41fbe499dc64 *man/explain.Rd cb1e46f469cfbbbde29c8b5113e1d789 *man/figures/lifecycle-archived.svg c0d2e5a54f1fa4ff02bf9533079dd1f7 *man/figures/lifecycle-defunct.svg a1b8c987c676c16af790f563f96cbb1f *man/figures/lifecycle-deprecated.svg c3978703d8f40f2679795335715e98f4 *man/figures/lifecycle-experimental.svg 952b59dc07b171b97d5d982924244f61 *man/figures/lifecycle-maturing.svg 27b879bf3677ea76e3991d56ab324081 *man/figures/lifecycle-questioning.svg 46de21252239c5a23d400eae83ec6b2d *man/figures/lifecycle-retired.svg 6902bbfaf963fbc4ed98b86bda80caa2 *man/figures/lifecycle-soft-deprecated.svg 53b3f893324260b737b3c46ed2a0e643 *man/figures/lifecycle-stable.svg 1c1fe7a759b86dc6dbcbe7797ab8246c *man/figures/lifecycle-superseded.svg 1c9275a703baa038ba079452e917859c *man/figures/logo.png 0c9656ec7c61de58567bb233c47f0a8c *man/filter-joins.Rd 0232fca18bad7a62542d3d4ff89653ff *man/filter.Rd a87bdbd545a96906e138a0cc5e1a4e51 *man/filter_all.Rd 4a981d03b2890607274edf8c2bb56b3c *man/funs.Rd f77c0049953b91ac184faca8c56ca0fd *man/glimpse.Rd 0b51e47279ad7439cef434c99f06a8f7 *man/group_by.Rd 01b24ed5df98fa76f3f68c042942b18d *man/group_by_all.Rd 316b661e2d8e739f9e0f7515468a7242 *man/group_by_drop_default.Rd 36d1a634e317cc7ab458f6f9811b196a *man/group_by_prepare.Rd 2063c4e6693f1df7a595a033dbd67061 *man/group_cols.Rd 9a8b579033db2444476fba77904dada2 *man/group_data.Rd 828c2fa8ccad77e6b6f53970fa2daff8 *man/group_map.Rd 6b5a0f77404697497a76d121eb50f3e7 *man/group_nest.Rd 9b4e18af9da99a24b743da8f5a210318 *man/group_split.Rd 1315864c88c80a3c4b9a51d0817b3662 *man/group_trim.Rd 10bb88f3a0e92972d8310247f42a5326 *man/grouped_df.Rd 3d7a441e6f61b0398e2ac9d36981fdb1 *man/ident.Rd 89bba3d0c1ef6aa09fd03819271fd5c8 *man/if_else.Rd 1d1139ec2bfe64b5a3513f21d292fe4d *man/join_by.Rd 73c4160301b17422973415f7246cea67 *man/last_dplyr_warnings.Rd 8c37d3bfe0b863bcf2da03803c60f539 *man/lead-lag.Rd baa3c40092060bcf679a21c3a3b80108 *man/make_tbl.Rd 8088a6c4fd5e2630e6eb0efff70a6ca9 *man/mutate-joins.Rd 0461b7fac765aebf4c542b6a7f82d209 *man/mutate.Rd 9e49f6a9abcf8ed4a33900f1db612c23 *man/mutate_all.Rd e5fc124fa2e4d735d686adfa459ee2ce *man/n_distinct.Rd cd0a10f6c2d8ff05ec44fc173a6c2781 *man/na_if.Rd 0c99aebdb1662bb9e9fc97334212cc4d *man/near.Rd 0839edf30c7c76b3f2f4e2628091f17a *man/nest_by.Rd 0965dc37659996e774aa4441cc9d8916 *man/nest_join.Rd c63195ca8a28bc11dfbed073f3436264 *man/new_grouped_df.Rd 25b9ae8d908370b6b6c994a4514b69a9 *man/nth.Rd f5db068d8fa8477b3c54ed168301f45e *man/ntile.Rd 535b4dbee16f84764a5a889687d83159 *man/order_by.Rd 775253850568ce83bc89f3778a000b47 *man/percent_rank.Rd 06856f131b4e882e90b5d30e2bf40194 *man/pick.Rd 771a1b83656a2c73444c14f67231fc8b *man/progress_estimated.Rd ca30bc76cdcce294551cb69cbeb3c123 *man/pull.Rd ce68ec54f3176d925963beb749435e2c *man/recode.Rd 7b3a653c018c43b68b45af0e891406d7 *man/reexports.Rd f07d4f2a003c68197074d1ae77d78828 *man/reframe.Rd 8b588aa430fee00fe485960e0eec1e36 *man/relocate.Rd 94aefe6bda32757e67cb70763fb0d4e0 *man/rename.Rd 222c325c44063b8a40a5e66f6660e57a *man/rmd/by.Rmd b08113cbec5e246ced3dae5437c3aa35 *man/rmd/overview.Rmd 2763a9433a5f5e387b54d9797b5b2320 *man/rmd/select.Rmd f8f7d758a0697bfc9be28c59139ac363 *man/row_number.Rd 2868c2097b7a287618f1a6bad0ab8e33 *man/rows.Rd 5713ee639dd278b629a2b1589c0b3c47 *man/rowwise.Rd 988ce8f1ec9a57ceca65bb3454cc1d20 *man/same_src.Rd 07d7cb164ad6e346243c7379b8fd1f07 *man/sample_n.Rd 51c5d5f440f86ff70438da9ef9ff671e *man/scoped.Rd d37085be047fc39665b226ee512ab6cc *man/se-deprecated.Rd 7c03c32a4f0d6369988f578d6db70362 *man/select.Rd a70159a7e6d4bee3052cd5c09f5fc009 *man/select_all.Rd 1438b8a19e545dcc119495bae06148d3 *man/setops.Rd 81c4b4e747bf7dfbb4c530e650a3d221 *man/slice.Rd 58452b35baf6118d7567b5cc0ce5aa9d *man/sql.Rd b095c446446ccec5ad5a71f7c67abb2e *man/src.Rd 40d73d3f5218395dd9d11fcc6b620369 *man/src_dbi.Rd 7ee274017d06ade58d410e5071d2af7c *man/src_local.Rd aa497adc8e0bf111028008b2c9fbfc2d *man/src_tbls.Rd 34225333bb8746a5cc36d133da79382d *man/starwars.Rd b55cf152609311b5aec1c266dbc6b535 *man/storms.Rd a7081d5fff5c33c19809259367886100 *man/summarise.Rd aa47c6ce601607d2cdef51da9e3282f3 *man/summarise_all.Rd f39c50448dc8884049f5c2e0c3fe29c9 *man/summarise_each.Rd c116f7273062b1cea8dbb10ecb2ebfd0 *man/tbl.Rd 12a9390dc42e26bf24c697a7fef007b2 *man/tbl_df.Rd ff0579184a0570e6393a9487639a9bc8 *man/tbl_ptype.Rd 12b8d60664b65b89bdabf47c859bce32 *man/tbl_vars.Rd c2e6c2efbbf4d3935f37a736a4bff145 *man/tidyeval-compat.Rd bd7658ac2bdd0fad530681f681e3859b *man/top_n.Rd 248651e2177fb50827b58603ab1ede0f *man/transmute.Rd 7ec42f01d5daceaf0c324cdd3e4001c6 *man/vars.Rd 4507933dd912f9b0e3a28e7d936fc883 *man/with_groups.Rd 0f716a00c40a985424f71afd4d758a80 *man/with_order.Rd 9d62a118b863491d383c9cee449d0fe6 *src/chop.cpp 93e101d223d45bf68ac2e834f9787141 *src/dplyr.h ac050c3972d9569cae34bf3d2165bbfd *src/filter.cpp 61d1b344819372c36879d75d4c0ff591 *src/funs.cpp d4f6d9e598ef77ecd7dedad5233f50cc *src/group_by.cpp 67ccfa80646c08b438ebf67a38667549 *src/group_data.cpp aa0e4ee0308cb60089f8ea43c39b1906 *src/imports.cpp f4f8031c0f765f3bb1bc949ad500aa4c *src/init.cpp f2c45d3bd47165137ec4d0b226789108 *src/mask.cpp c7355467bfd2895af9e6f7db98efbff3 *src/mutate.cpp 50dc89a7db4c0fc1f4d5d82b896ba262 *src/reconstruct.cpp e677f0014a3e2b0c0fe9746e91268859 *src/slice.cpp 39c1abc378d4c3dd38679489dde7099f *src/summarise.cpp 60c24a9c9c03f728e0d81d86fa6ca4d0 *tests/testthat.R 11f2e3cfe1d3de10e20354e1d43719c1 *tests/testthat/_snaps/across.md 239b530d0c8ec531136121ffc1279f97 *tests/testthat/_snaps/all-equal.md 22eb02c0887662677b70a71994ba149e *tests/testthat/_snaps/arrange.md 53712b17e76e8e65a58039553f709a1b *tests/testthat/_snaps/bind-cols.md d4262e4f1d27bee8d3d12a123e0fa9de *tests/testthat/_snaps/bind-rows.md c3a9079f4cb850475c002b2e2bdc60d0 *tests/testthat/_snaps/by.md 57735b6a66e73828366c457572a9593c *tests/testthat/_snaps/case-match.md aaaed059b26600fae245466b4f98143a *tests/testthat/_snaps/case-when.md 1c4d1f408c9b76427932842887007a51 *tests/testthat/_snaps/coalesce.md a3be2f2082973753d912e3d4417e4360 *tests/testthat/_snaps/colwise-filter.md ff232ca67527db3f86b702a1926e52b2 *tests/testthat/_snaps/colwise-mutate.md 1874d9aa2bd2dcd9091c9e92c78da0be *tests/testthat/_snaps/colwise-select.md 344c594226a02e072933b23638c3f9b0 *tests/testthat/_snaps/colwise.md 0a0d012525d3e60a49fffc2eaee73f77 *tests/testthat/_snaps/conditions.md 8dbfd127154e1eb0dd9147b5836dd115 *tests/testthat/_snaps/consecutive-id.md ef6207a499a8e561c3336e0748c69364 *tests/testthat/_snaps/context.md 6abc687e7ef86d1809fd19312ddd5864 *tests/testthat/_snaps/copy-to.md a00492a31fb1959d3754b69f8fdb9106 *tests/testthat/_snaps/count-tally.md 74a3263b05d59ffa2f1b23d39a5f72f2 *tests/testthat/_snaps/defunct.md 3602b51f2d1bc7f37e0c151047986552 *tests/testthat/_snaps/deprec-combine.md 7449d802b0e78933c843a3ca193c7908 *tests/testthat/_snaps/deprec-context.md 7ecc1a26663c4757024940bbe8db475f *tests/testthat/_snaps/deprec-dbi.md a09c55d7849612c738339b939aa20369 *tests/testthat/_snaps/deprec-do.md cc527570dc25d1a4ba00caec5002d8a1 *tests/testthat/_snaps/deprec-funs.md 2cdb0049e3d8364819b79de0982f1537 *tests/testthat/_snaps/deprec-lazyeval.md d5cd7d205f2cef83c7fd43d9435ad021 *tests/testthat/_snaps/deprec-src-local.md dd427c9bcf7101db74c552b33dbd7b72 *tests/testthat/_snaps/desc.md dcd656253643b938552f93be0a423815 *tests/testthat/_snaps/distinct.md 93ca7eef275a64f52178489fc7b54579 *tests/testthat/_snaps/filter.md ff804c0f544abe9807e76d2c3d1e9f74 *tests/testthat/_snaps/funs.md f221d94236359b9d8e33a2db2711e13d *tests/testthat/_snaps/group-by.md 82d81578ca3344e88a0250f1f2912dbe *tests/testthat/_snaps/group-map.md 12da86c055ea0f307a28ddc53c112fc6 *tests/testthat/_snaps/grouped-df.md d61899d295f54f7f5040748587015ae4 *tests/testthat/_snaps/if-else.md 4d778608e3fbccc770d45af80e2793c6 *tests/testthat/_snaps/join-by.md 72b0bc8a58f17c321637e811bf24baeb *tests/testthat/_snaps/join-cols.md dac9cfecd2c566cbee2400a970fc1a22 *tests/testthat/_snaps/join-cross.md 7904fdcdf6c50817431804eb1c2c3a76 *tests/testthat/_snaps/join-rows.md ceab7c96ab407163ac1982310fc0b30c *tests/testthat/_snaps/join.md 5901e95408737af188d285c14a7c9e9a *tests/testthat/_snaps/lead-lag.md 9e78dd65a3ff676417d63bbccdc97913 *tests/testthat/_snaps/locale.md 5744b97342ac35490285c8c0c6e48f65 *tests/testthat/_snaps/mutate.md c468ce0b59f44a8de83b0550e506878a *tests/testthat/_snaps/n-distinct.md 5219f13e012ddb5618f726fddb8c7141 *tests/testthat/_snaps/na-if.md f3439a230fcd619c24925eb2065a694e *tests/testthat/_snaps/nth-value.md 78ffbe99f6ff0a217abafa32a3c5e020 *tests/testthat/_snaps/order-by.md cd8bfb04421610bea2b93827c5b1e562 *tests/testthat/_snaps/pick.md 5a6cf7c95915ce5c018962f4bd3c4c9e *tests/testthat/_snaps/rank.md 0cdc925a344ef1e1772a87d643b526ba *tests/testthat/_snaps/recode.md 7ad69224d2e27b23a41aa11565ede869 *tests/testthat/_snaps/reframe.md bc055b59f8f0decc34e9e85f34801dae *tests/testthat/_snaps/relocate.md 6046982d2ca8766341f2b34a699aed5f *tests/testthat/_snaps/rename.md 0aba62069e295a9874e28996251995c9 *tests/testthat/_snaps/rows.md 53489be7738869efc66645feff3ae75e *tests/testthat/_snaps/rowwise.md 7761ef8edee95ad681a35db72f221369 *tests/testthat/_snaps/sample.md 6a220b4bd25a0c72d93548cafbd2d1be *tests/testthat/_snaps/select.md dca965b05cbfad13401d9e605ca021bb *tests/testthat/_snaps/sets.md d5a2f9e6a064e69ee817b3df501bb170 *tests/testthat/_snaps/slice.md 98393475b2f023d746191b917ee9a86d *tests/testthat/_snaps/summarise.md 72b2f086d5f8154b91f7817d7ac2c51d *tests/testthat/_snaps/top-n.md 07e61ca097e0511318a0a59f689fa458 *tests/testthat/_snaps/transmute.md fad2051196d6e865af17241bb051ff7c *tests/testthat/_snaps/vec-case-match.md bb25c717ae51dc1b68016830b3f66b3e *tests/testthat/_snaps/vec-case-when.md ce71038c4d0b3e45faf92a87c141938e *tests/testthat/helper-dplyr.R c6a7ffb3fd270a5a1839d2a9c593f772 *tests/testthat/helper-encoding.R 4469e3540ecfa38bc6eaa62f39118a8b *tests/testthat/helper-lazy.R 184b9a3701693b11c886455f587fd9d4 *tests/testthat/helper-pick.R 8b590c78c107292f9159e0cecae923c7 *tests/testthat/helper-s3.R 5c40bc3557d7e68c3f963b4f428f5c20 *tests/testthat/helper-torture.R 7e47de7514428e48225b8469ce87cb79 *tests/testthat/test-across.R 08d236eeae3536c6e8547d84ed985a24 *tests/testthat/test-all-equal.R 0ab76d0183eb72a0b8bd24fc144591e6 *tests/testthat/test-arrange.R 6e3d39c0dd63d625012146536a945bca *tests/testthat/test-bind-cols.R b956ffac2e092bcf9d55c8f1e706d9f5 *tests/testthat/test-bind-rows.R 3a77f6be418406835f35687008522dd6 *tests/testthat/test-by.R a5000d9f7aae25070e50bf8493f41426 *tests/testthat/test-case-match.R fbdc2bddbf725381cba8791e8919125b *tests/testthat/test-case-when.R 58b7d48694bf27f8b619440589c8ac7d *tests/testthat/test-coalesce.R 58bbbb603cda22e5dd0422d0f2a36a66 *tests/testthat/test-colwise-arrange.R 60402bc767fae94d4af51b836e2245fa *tests/testthat/test-colwise-distinct.R 932202a3c8eeff54ff65cfba64c6036e *tests/testthat/test-colwise-filter.R 989caa75d2c5938cdb53e2cbae96e8cb *tests/testthat/test-colwise-funs.R 0db080a8efd026edb15b03c658ac5eb6 *tests/testthat/test-colwise-group-by.R 6f64f91750a8a0cba8e35fbb87215a34 *tests/testthat/test-colwise-mutate.R 2876198377d9e86f36b715abb0c58517 *tests/testthat/test-colwise-select.R 913282791a28f971ce50ef73bcd18b52 *tests/testthat/test-colwise.R 6634683a9ca5ed360f0326d9b3e23044 *tests/testthat/test-conditions.R 2d0baee20f63a6d778bd1a133a01d5ad *tests/testthat/test-consecutive-id.R 1c3675cd0b56f2fbb7750e8e2ad5d33f *tests/testthat/test-context.R ea3dac87c76870b942e3010ca1431b56 *tests/testthat/test-copy-to.R c08ab6dcbf9318e399029018285a8b82 *tests/testthat/test-count-tally.R d1c2c840623ceb71475176d35722a896 *tests/testthat/test-defunct.R 253b3da2a8f567393a348bcfa3bfe50a *tests/testthat/test-deprec-combine.R 4e1d5d8ce002b54c57e931d82aa39340 *tests/testthat/test-deprec-context.R 0c920c856e78563e8d68b5dea5cd8cd2 *tests/testthat/test-deprec-dbi.R 8e43477807e6ec95510ca1fab9ce64da *tests/testthat/test-deprec-do.R e4a6a64b73ebb9c3f0065e8b93fb9140 *tests/testthat/test-deprec-funs.R 3c0e75a64db3589dac16fdc11b1b610e *tests/testthat/test-deprec-lazyeval.R de87f0d053dc3defe5217638a3d4448c *tests/testthat/test-deprec-src-local.R f82f247adb2819171a667ace2d2990bb *tests/testthat/test-deprec-tibble.R acf56d27f1cab0e1fa47a047d04152cf *tests/testthat/test-desc.R 53a2100336ff57ebb3e92f0c0a819fcf *tests/testthat/test-distinct.R d8c9521d3ca98277ffcfd61d5871f76d *tests/testthat/test-filter.R 1bb5361d8e6e45324b09b8b70d3af105 *tests/testthat/test-funs.R 65d2e3df2a6b851d59e33cd1d6adc2d1 *tests/testthat/test-generics.R a9a92923ac744b11163898ff08ea5503 *tests/testthat/test-group-by.R 4ee5ca3f7215d78ecd4c99a427278a12 *tests/testthat/test-group-data.R a6b33b7229f1049f1c62fecf29214e86 *tests/testthat/test-group-map.R afcddb1575cf121b717c056aa23f142d *tests/testthat/test-group-nest.R ceea0873e618109dd9c5f52c6865b10a *tests/testthat/test-group-split.R 2d736d0b17b973cd95e969a8bb9749b2 *tests/testthat/test-group-trim.R 3d01f628e88a0ab8b033320e514f6e23 *tests/testthat/test-grouped-df.R 33822455c18b2d6e1bf09776dfb56b6e *tests/testthat/test-groups-with.R b1f96e8e55c2c0747eadacbd1e72dcc3 *tests/testthat/test-if-else.R bafa82cd3ace7c01629a117f5bf41b9c *tests/testthat/test-join-by.R 5f5bfaa64bbedc30777fa31904df9430 *tests/testthat/test-join-cols.R 0fb3994344728d1730595a39e5df7343 *tests/testthat/test-join-cross.R b111fd2dc49df4d90dc6711a9131dd0f *tests/testthat/test-join-rows.R 16026bd7b5346b802af450aa3deb4f24 *tests/testthat/test-join.R f3e804d92a55b709b4dc661599dcecb4 *tests/testthat/test-lead-lag.R c4a22c1e6abfe49a260695b99a0dec13 *tests/testthat/test-locale.R 64f214aa984fc99d53cfc46f24fa04b2 *tests/testthat/test-mutate.R 6db6e2391ed314adbebd6bade950e858 *tests/testthat/test-n-distinct.R 9328117973cb54c359402b2aa34df9ad *tests/testthat/test-na-if.R 5ea93280062e3dc5258f73736989706d *tests/testthat/test-near.R 40670af2057737b27c986c0a2e9ff058 *tests/testthat/test-nest-by.R d30af7da8091782855c6335dfe0e79db *tests/testthat/test-nth-value.R a416fa4012ef9ea4a677658bf537fe96 *tests/testthat/test-order-by.R 6559f0f794a9dd9825cb8931b410a230 *tests/testthat/test-pick.R 65c13e813a1ba14107e7b5e72e09ee0a *tests/testthat/test-pull.R dcbd9d5be00475cee6a8d7bd6f055df2 *tests/testthat/test-rank.R 2f2ce2e8ca455ff3d74250f979fdc696 *tests/testthat/test-recode.R 8404869154400bc2ce5a219ff0e4a50a *tests/testthat/test-reframe.R 1d22aeac028b705da6b99155fc7e903c *tests/testthat/test-relocate.R 702d8651475a30066cfd1772959b2007 *tests/testthat/test-rename.R 0831b65d11dcdf9b98946f4c5efad2de *tests/testthat/test-rows.R 0a93e54fc977b848c8eadc430a87e990 *tests/testthat/test-rowwise.R 7f0d5c7c0a2b0c658a8bf2a753d91250 *tests/testthat/test-sample.R 1af7ca923551e1a810bd110200f7881d *tests/testthat/test-select-helpers.R c6fc99837c223818766c49400b79166a *tests/testthat/test-select.R 07b527aa51deb2be28f2365790a28d34 *tests/testthat/test-sets.R 7637c56adfc06c87f884db824b009efc *tests/testthat/test-slice.R db565b0d20e096ffb506dd27c7d20f55 *tests/testthat/test-src-dbi.R 873044283ad983672f0574f49dc73e3c *tests/testthat/test-summarise.R f97a14a5701b40eba1580202c66cad3b *tests/testthat/test-tbl.R faf2a4bdd26756be03ef12b98d55e45a *tests/testthat/test-top-n.R 83884b34f732168f5334d6634bf64cc5 *tests/testthat/test-transmute.R aa93134f23491b7e83f95d4d1e6a1a17 *tests/testthat/test-utils.R 37d2e6964796f2e6e4e405b9b9d20f53 *tests/testthat/test-vec-case-match.R 503c2a38b4b8062df4794f7c8cc85358 *tests/testthat/test-vec-case-when.R 86eea74dcbab50d52c262a4cb256a352 *tests/testthat/utf-8.txt d05ab21a50c52118a742f9357fd9552d *vignettes/base.Rmd a6504b70bb56595da80ebdeacb3a4571 *vignettes/colwise.Rmd 6c6c76cde452e1ac97aa6ef918e40cd2 *vignettes/compatibility.R 0d188192ab56d13d34be340aca9eadd5 *vignettes/compatibility.html f2dccae5a13cc4e1d7c6f80b6d7bd67c *vignettes/dplyr.Rmd 13adad66b3809dea3a6a5ff412f0763e *vignettes/grouping.Rmd aef8e1d6c3de3710586366a8b3ea46f9 *vignettes/in-packages.Rmd 7755a76ddfed8b48450ff7663b01fd6f *vignettes/programming.Rmd e7d21f3fe88c93594c939b38ac1c1205 *vignettes/rowwise.Rmd fdb7682a8f09c1976c19f30eaabc8133 *vignettes/two-table.Rmd 976537d75de5855dbab47dc63d638545 *vignettes/window-functions.Rmd dplyr/inst/0000755000176200001440000000000014525507110012361 5ustar liggesusersdplyr/inst/doc/0000755000176200001440000000000014525507110013126 5ustar liggesusersdplyr/inst/doc/colwise.R0000644000176200001440000001255014525507101014721 0ustar liggesusers## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set(collapse = T, comment = "#>") options(tibble.print_min = 4L, tibble.print_max = 4L) set.seed(1014) ## ----eval = FALSE------------------------------------------------------------- # df %>% # group_by(g1, g2) %>% # summarise(a = mean(a), b = mean(b), c = mean(c), d = mean(d)) ## ----eval = FALSE------------------------------------------------------------- # df %>% # group_by(g1, g2) %>% # summarise(across(a:d, mean)) ## ----setup-------------------------------------------------------------------- library(dplyr, warn.conflicts = FALSE) ## ----------------------------------------------------------------------------- starwars %>% summarise(across(where(is.character), n_distinct)) starwars %>% group_by(species) %>% filter(n() > 1) %>% summarise(across(c(sex, gender, homeworld), n_distinct)) starwars %>% group_by(homeworld) %>% filter(n() > 1) %>% summarise(across(where(is.numeric), ~ mean(.x, na.rm = TRUE))) ## ----------------------------------------------------------------------------- df <- data.frame(g = c(1, 1, 2), x = c(-1, 1, 3), y = c(-1, -4, -9)) df %>% group_by(g) %>% summarise(across(where(is.numeric), sum)) ## ----------------------------------------------------------------------------- min_max <- list( min = ~min(.x, na.rm = TRUE), max = ~max(.x, na.rm = TRUE) ) starwars %>% summarise(across(where(is.numeric), min_max)) starwars %>% summarise(across(c(height, mass, birth_year), min_max)) ## ----------------------------------------------------------------------------- starwars %>% summarise(across(where(is.numeric), min_max, .names = "{.fn}.{.col}")) starwars %>% summarise(across(c(height, mass, birth_year), min_max, .names = "{.fn}.{.col}")) ## ----------------------------------------------------------------------------- starwars %>% summarise( across(c(height, mass, birth_year), ~min(.x, na.rm = TRUE), .names = "min_{.col}"), across(c(height, mass, birth_year), ~max(.x, na.rm = TRUE), .names = "max_{.col}") ) ## ----------------------------------------------------------------------------- starwars %>% summarise( tibble( across(where(is.numeric), ~min(.x, na.rm = TRUE), .names = "min_{.col}"), across(where(is.numeric), ~max(.x, na.rm = TRUE), .names = "max_{.col}") ) ) ## ----------------------------------------------------------------------------- starwars %>% summarise(across(where(is.numeric), min_max, .names = "{.fn}.{.col}")) %>% relocate(starts_with("min")) ## ----------------------------------------------------------------------------- df <- tibble(x = 1:3, y = 3:5, z = 5:7) mult <- list(x = 1, y = 10, z = 100) df %>% mutate(across(all_of(names(mult)), ~ .x * mult[[cur_column()]])) ## ----------------------------------------------------------------------------- df <- data.frame(x = c(1, 2, 3), y = c(1, 4, 9)) df %>% summarise(n = n(), across(where(is.numeric), sd)) ## ----------------------------------------------------------------------------- df %>% summarise(across(where(is.numeric), sd), n = n()) ## ----------------------------------------------------------------------------- df %>% summarise(n = n(), across(where(is.numeric) & !n, sd)) ## ----------------------------------------------------------------------------- df %>% summarise( tibble(n = n(), across(where(is.numeric), sd)) ) ## ----------------------------------------------------------------------------- rescale01 <- function(x) { rng <- range(x, na.rm = TRUE) (x - rng[1]) / (rng[2] - rng[1]) } df <- tibble(x = 1:4, y = rnorm(4)) df %>% mutate(across(where(is.numeric), rescale01)) ## ----------------------------------------------------------------------------- starwars %>% distinct(pick(contains("color"))) ## ----------------------------------------------------------------------------- starwars %>% count(pick(contains("color")), sort = TRUE) ## ----------------------------------------------------------------------------- starwars %>% filter(if_any(everything(), ~ !is.na(.x))) ## ----------------------------------------------------------------------------- starwars %>% filter(if_all(everything(), ~ !is.na(.x))) ## ----eval = FALSE------------------------------------------------------------- # df %>% # group_by(g1, g2) %>% # summarise( # across(where(is.numeric), mean), # across(where(is.factor), nlevels), # n = n(), # ) ## ----results = FALSE---------------------------------------------------------- df %>% mutate_if(is.numeric, ~mean(.x, na.rm = TRUE)) # -> df %>% mutate(across(where(is.numeric), ~mean(.x, na.rm = TRUE))) df %>% mutate_at(vars(c(x, starts_with("y"))), mean) # -> df %>% mutate(across(c(x, starts_with("y")), mean)) df %>% mutate_all(mean) # -> df %>% mutate(across(everything(), mean)) ## ----------------------------------------------------------------------------- df <- tibble(x = c("a", "b"), y = c(1, 1), z = c(-1, 1)) # Find all rows where EVERY numeric variable is greater than zero df %>% filter(if_all(where(is.numeric), ~ .x > 0)) # Find all rows where ANY numeric variable is greater than zero df %>% filter(if_any(where(is.numeric), ~ .x > 0)) ## ----------------------------------------------------------------------------- df <- tibble(x = 2, y = 4, z = 8) df %>% mutate_all(~ .x / y) df %>% mutate(across(everything(), ~ .x / y)) dplyr/inst/doc/rowwise.Rmd0000644000176200001440000003357114420040360015273 0ustar liggesusers--- title: "Row-wise operations" description: > In R, it's usually easier to do something for each column than for each row. In this vignette you will learn how to use the `rowwise()` function to perform operations by row. Along the way, you'll learn about list-columns, and see how you might perform simulations and modelling within dplyr verbs. output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Row-wise operations} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} knitr::opts_chunk$set(collapse = T, comment = "#>") options(tibble.print_min = 4L, tibble.print_max = 4L) ``` dplyr, and R in general, are particularly well suited to performing operations over columns, and performing operations over rows is much harder. In this vignette, you'll learn dplyr's approach centred around the row-wise data frame created by `rowwise()`. There are three common use cases that we discuss in this vignette: * Row-wise aggregates (e.g. compute the mean of x, y, z). * Calling a function multiple times with varying arguments. * Working with list-columns. These types of problems are often easily solved with a for loop, but it's nice to have a solution that fits naturally into a pipeline. > Of course, someone has to write loops. It doesn't have to be you. > --- Jenny Bryan ```{r setup} library(dplyr, warn.conflicts = FALSE) ``` ```{r include = FALSE} nest_by <- function(df, ...) { df %>% group_by(...) %>% summarise(data = list(pick(everything()))) %>% rowwise(...) } # mtcars %>% nest_by(cyl) ``` ## Creating Row-wise operations require a special type of grouping where each group consists of a single row. You create this with `rowwise()`: ```{r} df <- tibble(x = 1:2, y = 3:4, z = 5:6) df %>% rowwise() ``` Like `group_by()`, `rowwise()` doesn't really do anything itself; it just changes how the other verbs work. For example, compare the results of `mutate()` in the following code: ```{r} df %>% mutate(m = mean(c(x, y, z))) df %>% rowwise() %>% mutate(m = mean(c(x, y, z))) ``` If you use `mutate()` with a regular data frame, it computes the mean of `x`, `y`, and `z` across all rows. If you apply it to a row-wise data frame, it computes the mean for each row. You can optionally supply "identifier" variables in your call to `rowwise()`. These variables are preserved when you call `summarise()`, so they behave somewhat similarly to the grouping variables passed to `group_by()`: ```{r} df <- tibble(name = c("Mara", "Hadley"), x = 1:2, y = 3:4, z = 5:6) df %>% rowwise() %>% summarise(m = mean(c(x, y, z))) df %>% rowwise(name) %>% summarise(m = mean(c(x, y, z))) ``` `rowwise()` is just a special form of grouping, so if you want to remove it from a data frame, just call `ungroup()`. ## Per row summary statistics `dplyr::summarise()` makes it really easy to summarise values across rows within one column. When combined with `rowwise()` it also makes it easy to summarise values across columns within one row. To see how, we'll start by making a little dataset: ```{r} df <- tibble(id = 1:6, w = 10:15, x = 20:25, y = 30:35, z = 40:45) df ``` Let's say we want compute the sum of `w`, `x`, `y`, and `z` for each row. We start by making a row-wise data frame: ```{r} rf <- df %>% rowwise(id) ``` We can then use `mutate()` to add a new column to each row, or `summarise()` to return just that one summary: ```{r} rf %>% mutate(total = sum(c(w, x, y, z))) rf %>% summarise(total = sum(c(w, x, y, z))) ``` Of course, if you have a lot of variables, it's going to be tedious to type in every variable name. Instead, you can use `c_across()` which uses tidy selection syntax so you can to succinctly select many variables: ```{r} rf %>% mutate(total = sum(c_across(w:z))) rf %>% mutate(total = sum(c_across(where(is.numeric)))) ``` You could combine this with column-wise operations (see `vignette("colwise")` for more details) to compute the proportion of the total for each column: ```{r} rf %>% mutate(total = sum(c_across(w:z))) %>% ungroup() %>% mutate(across(w:z, ~ . / total)) ``` ### Row-wise summary functions The `rowwise()` approach will work for any summary function. But if you need greater speed, it's worth looking for a built-in row-wise variant of your summary function. These are more efficient because they operate on the data frame as whole; they don't split it into rows, compute the summary, and then join the results back together again. ```{r} df %>% mutate(total = rowSums(pick(where(is.numeric), -id))) df %>% mutate(mean = rowMeans(pick(where(is.numeric), -id))) ``` **NB**: I use `df` (not `rf`) and `pick()` (not `c_across()`) here because `rowMeans()` and `rowSums()` take a multi-row data frame as input. Also note that `-id` is needed to avoid selecting `id` in `pick()`. This wasn't required with the rowwise data frame because we had specified `id` as an identifier in our original call to `rowwise()`, preventing it from being selected as a grouping column. ```{r, eval = FALSE, include = FALSE} bench::mark( df %>% mutate(m = rowSums(pick(x:z))), df %>% mutate(m = apply(pick(x:z), 1, sum)), df %>% rowwise() %>% mutate(m = sum(pick(x:z))), check = FALSE ) ``` ## List-columns `rowwise()` operations are a natural pairing when you have list-columns. They allow you to avoid explicit loops and/or functions from the `apply()` or `purrr::map()` families. ### Motivation Imagine you have this data frame, and you want to count the lengths of each element: ```{r} df <- tibble( x = list(1, 2:3, 4:6) ) ``` You might try calling `length()`: ```{r} df %>% mutate(l = length(x)) ``` But that returns the length of the column, not the length of the individual values. If you're an R documentation aficionado, you might know there's already a base R function just for this purpose: ```{r} df %>% mutate(l = lengths(x)) ``` Or if you're an experienced R programmer, you might know how to apply a function to each element of a list using `sapply()`, `vapply()`, or one of the purrr `map()` functions: ```{r} df %>% mutate(l = sapply(x, length)) df %>% mutate(l = purrr::map_int(x, length)) ``` But wouldn't it be nice if you could just write `length(x)` and dplyr would figure out that you wanted to compute the length of the element inside of `x`? Since you're here, you might already be guessing at the answer: this is just another application of the row-wise pattern. ```{r} df %>% rowwise() %>% mutate(l = length(x)) ``` ### Subsetting Before we continue on, I wanted to briefly mention the magic that makes this work. This isn't something you'll generally need to think about (it'll just work), but it's useful to know about when something goes wrong. There's an important difference between a grouped data frame where each group happens to have one row, and a row-wise data frame where every group always has one row. Take these two data frames: ```{r} df <- tibble(g = 1:2, y = list(1:3, "a")) gf <- df %>% group_by(g) rf <- df %>% rowwise(g) ``` If we compute some properties of `y`, you'll notice the results look different: ```{r} gf %>% mutate(type = typeof(y), length = length(y)) rf %>% mutate(type = typeof(y), length = length(y)) ``` They key difference is that when `mutate()` slices up the columns to pass to `length(y)` the grouped mutate uses `[` and the row-wise mutate uses `[[`. The following code gives a flavour of the differences if you used a for loop: ```{r} # grouped out1 <- integer(2) for (i in 1:2) { out1[[i]] <- length(df$y[i]) } out1 # rowwise out2 <- integer(2) for (i in 1:2) { out2[[i]] <- length(df$y[[i]]) } out2 ``` Note that this magic only applies when you're referring to existing columns, not when you're creating new rows. This is potentially confusing, but we're fairly confident it's the least worst solution, particularly given the hint in the error message. ```{r, error = TRUE} gf %>% mutate(y2 = y) rf %>% mutate(y2 = y) rf %>% mutate(y2 = list(y)) ``` ### Modelling `rowwise()` data frames allow you to solve a variety of modelling problems in what I think is a particularly elegant way. We'll start by creating a nested data frame: ```{r} by_cyl <- mtcars %>% nest_by(cyl) by_cyl ``` This is a little different to the usual `group_by()` output: we have visibly changed the structure of the data. Now we have three rows (one for each group), and we have a list-col, `data`, that stores the data for that group. Also note that the output is `rowwise()`; this is important because it's going to make working with that list of data frames much easier. Once we have one data frame per row, it's straightforward to make one model per row: ```{r} mods <- by_cyl %>% mutate(mod = list(lm(mpg ~ wt, data = data))) mods ``` And supplement that with one set of predictions per row: ```{r} mods <- mods %>% mutate(pred = list(predict(mod, data))) mods ``` You could then summarise the model in a variety of ways: ```{r} mods %>% summarise(rmse = sqrt(mean((pred - data$mpg) ^ 2))) mods %>% summarise(rsq = summary(mod)$r.squared) mods %>% summarise(broom::glance(mod)) ``` Or easily access the parameters of each model: ```{r} mods %>% reframe(broom::tidy(mod)) ``` ## Repeated function calls `rowwise()` doesn't just work with functions that return a length-1 vector (aka summary functions); it can work with any function if the result is a list. This means that `rowwise()` and `mutate()` provide an elegant way to call a function many times with varying arguments, storing the outputs alongside the inputs. ### Simulations I think this is a particularly elegant way to perform simulations, because it lets you store simulated values along with the parameters that generated them. For example, imagine you have the following data frame that describes the properties of 3 samples from the uniform distribution: ```{r} df <- tribble( ~ n, ~ min, ~ max, 1, 0, 1, 2, 10, 100, 3, 100, 1000, ) ``` You can supply these parameters to `runif()` by using `rowwise()` and `mutate()`: ```{r} df %>% rowwise() %>% mutate(data = list(runif(n, min, max))) ``` Note the use of `list()` here - `runif()` returns multiple values and a `mutate()` expression has to return something of length 1. `list()` means that we'll get a list column where each row is a list containing multiple values. If you forget to use `list()`, dplyr will give you a hint: ```{r, error = TRUE} df %>% rowwise() %>% mutate(data = runif(n, min, max)) ``` ### Multiple combinations What if you want to call a function for every combination of inputs? You can use `expand.grid()` (or `tidyr::expand_grid()`) to generate the data frame and then repeat the same pattern as above: ```{r} df <- expand.grid(mean = c(-1, 0, 1), sd = c(1, 10, 100)) df %>% rowwise() %>% mutate(data = list(rnorm(10, mean, sd))) ``` ### Varying functions In more complicated problems, you might also want to vary the function being called. This tends to be a bit more of an awkward fit with this approach because the columns in the input tibble will be less regular. But it's still possible, and it's a natural place to use `do.call()`: ```{r} df <- tribble( ~rng, ~params, "runif", list(n = 10), "rnorm", list(n = 20), "rpois", list(n = 10, lambda = 5), ) %>% rowwise() df %>% mutate(data = list(do.call(rng, params))) ``` ```{r, include = FALSE, eval = FALSE} df <- rowwise(tribble( ~rng, ~params, "runif", list(min = -1, max = 1), "rnorm", list(), "rpois", list(lambda = 5), )) # Has to happen in separate function to avoid eager unquoting f <- function(rng, params) purrr::exec(rng, n = 10, !!!params) df %>% mutate(data = list(f(rng, params))) ``` ## Previously ### `rowwise()` `rowwise()` was also questioning for quite some time, partly because I didn't appreciate how many people needed the native ability to compute summaries across multiple variables for each row. As an alternative, we recommended performing row-wise operations with the purrr `map()` functions. However, this was challenging because you needed to pick a map function based on the number of arguments that were varying and the type of result, which required quite some knowledge of purrr functions. I was also resistant to `rowwise()` because I felt like automatically switching between `[` to `[[` was too magical in the same way that automatically `list()`-ing results made `do()` too magical. I've now persuaded myself that the row-wise magic is good magic partly because most people find the distinction between `[` and `[[` mystifying and `rowwise()` means that you don't need to think about it. Since `rowwise()` clearly is useful it is not longer questioning, and we expect it to be around for the long term. ### `do()` We've questioned the need for `do()` for quite some time, because it never felt very similar to the other dplyr verbs. It had two main modes of operation: * Without argument names: you could call functions that input and output data frames using `.` to refer to the "current" group. For example, the following code gets the first row of each group: ```{r} mtcars %>% group_by(cyl) %>% do(head(., 1)) ``` This has been superseded by `pick()` plus `reframe()`, a variant of `summarise()` that can create multiple rows and columns per group. ```{r} mtcars %>% group_by(cyl) %>% reframe(head(pick(everything()), 1)) ``` * With arguments: it worked like `mutate()` but automatically wrapped every element in a list: ```{r} mtcars %>% group_by(cyl) %>% do(nrows = nrow(.)) ``` I now believe that behaviour is both too magical and not very useful, and it can be replaced by `summarise()` and `pick()`. ```{r} mtcars %>% group_by(cyl) %>% summarise(nrows = nrow(pick(everything()))) ``` If needed (unlike here), you can wrap the results in a list yourself. The addition of `pick()`/`across()` and the increased scope of `summarise()`/`reframe()` means that `do()` is no longer needed, so it is now superseded. dplyr/inst/doc/window-functions.Rmd0000644000176200001440000002233614266276767017144 0ustar liggesusers--- title: "Window functions" description: > Window functions are a useful family of functions that work with vectors (returning an output the same size as the input), and combine naturally with `mutate()` and `filter()`. output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Window functions} %\VignetteEngine{knitr::rmarkdown} %\usepackage[utf8]{inputenc} --- ```{r, include = FALSE} knitr::opts_chunk$set(collapse = T, comment = "#>") options(tibble.print_min = 4L, tibble.print_max = 4L) library(dplyr) set.seed(1014) if (!rlang::is_installed("Lahman")) { knitr::opts_chunk$set(eval = FALSE) } ``` A __window function__ is a variation on an aggregation function. Where an aggregation function, like `sum()` and `mean()`, takes n inputs and return a single value, a window function returns n values. The output of a window function depends on all its input values, so window functions don't include functions that work element-wise, like `+` or `round()`. Window functions include variations on aggregate functions, like `cumsum()` and `cummean()`, functions for ranking and ordering, like `rank()`, and functions for taking offsets, like `lead()` and `lag()`. In this vignette, we'll use a small sample of the Lahman batting dataset, including the players that have won an award. ```{r} library(Lahman) batting <- Lahman::Batting %>% as_tibble() %>% select(playerID, yearID, teamID, G, AB:H) %>% arrange(playerID, yearID, teamID) %>% semi_join(Lahman::AwardsPlayers, by = "playerID") players <- batting %>% group_by(playerID) ``` Window functions are used in conjunction with `mutate()` and `filter()` to solve a wide range of problems. Here's a selection: ```{r, eval = FALSE} # For each player, find the two years with most hits filter(players, min_rank(desc(H)) <= 2 & H > 0) # Within each player, rank each year by the number of games played mutate(players, G_rank = min_rank(G)) # For each player, find every year that was better than the previous year filter(players, G > lag(G)) # For each player, compute avg change in games played per year mutate(players, G_change = (G - lag(G)) / (yearID - lag(yearID))) # For each player, find all years where they played more games than they did on average filter(players, G > mean(G)) # For each, player compute a z score based on number of games played mutate(players, G_z = (G - mean(G)) / sd(G)) ``` Before reading this vignette, you should be familiar with `mutate()` and `filter()`. ## Types of window functions There are five main families of window functions. Two families are unrelated to aggregation functions: * Ranking and ordering functions: `row_number()`, `min_rank()`, `dense_rank()`, `cume_dist()`, `percent_rank()`, and `ntile()`. These functions all take a vector to order by, and return various types of ranks. * Offsets `lead()` and `lag()` allow you to access the previous and next values in a vector, making it easy to compute differences and trends. The other three families are variations on familiar aggregate functions: * Cumulative aggregates: `cumsum()`, `cummin()`, `cummax()` (from base R), and `cumall()`, `cumany()`, and `cummean()` (from dplyr). * Rolling aggregates operate in a fixed width window. You won't find them in base R or in dplyr, but there are many implementations in other packages, such as [RcppRoll](https://cran.r-project.org/package=RcppRoll). * Recycled aggregates, where an aggregate is repeated to match the length of the input. These are not needed in R because vector recycling automatically recycles aggregates where needed. They are important in SQL, because the presence of an aggregation function usually tells the database to return only one row per group. Each family is described in more detail below, focussing on the general goals and how to use them with dplyr. For more details, refer to the individual function documentation. ## Ranking functions The ranking functions are variations on a theme, differing in how they handle ties: ```{r} x <- c(1, 1, 2, 2, 2) row_number(x) min_rank(x) dense_rank(x) ``` If you're familiar with R, you may recognise that `row_number()` and `min_rank()` can be computed with the base `rank()` function and various values of the `ties.method` argument. These functions are provided to save a little typing, and to make it easier to convert between R and SQL. Two other ranking functions return numbers between 0 and 1. `percent_rank()` gives the percentage of the rank; `cume_dist()` gives the proportion of values less than or equal to the current value. ```{r} cume_dist(x) percent_rank(x) ``` These are useful if you want to select (for example) the top 10% of records within each group. For example: ```{r} filter(players, cume_dist(desc(G)) < 0.1) ``` Finally, `ntile()` divides the data up into `n` evenly sized buckets. It's a coarse ranking, and it can be used in with `mutate()` to divide the data into buckets for further summary. For example, we could use `ntile()` to divide the players within a team into four ranked groups, and calculate the average number of games within each group. ```{r} by_team_player <- group_by(batting, teamID, playerID) by_team <- summarise(by_team_player, G = sum(G)) by_team_quartile <- group_by(by_team, quartile = ntile(G, 4)) summarise(by_team_quartile, mean(G)) ``` All ranking functions rank from lowest to highest so that small input values get small ranks. Use `desc()` to rank from highest to lowest. ## Lead and lag `lead()` and `lag()` produce offset versions of a input vector that is either ahead of or behind the original vector. ```{r} x <- 1:5 lead(x) lag(x) ``` You can use them to: * Compute differences or percent changes. ```{r, results = "hide"} # Compute the relative change in games played mutate(players, G_delta = G - lag(G)) ``` Using `lag()` is more convenient than `diff()` because for `n` inputs `diff()` returns `n - 1` outputs. * Find out when a value changes. ```{r, results = "hide"} # Find when a player changed teams filter(players, teamID != lag(teamID)) ``` `lead()` and `lag()` have an optional argument `order_by`. If set, instead of using the row order to determine which value comes before another, they will use another variable. This is important if you have not already sorted the data, or you want to sort one way and lag another. Here's a simple example of what happens if you don't specify `order_by` when you need it: ```{r} df <- data.frame(year = 2000:2005, value = (0:5) ^ 2) scrambled <- df[sample(nrow(df)), ] wrong <- mutate(scrambled, prev_value = lag(value)) arrange(wrong, year) right <- mutate(scrambled, prev_value = lag(value, order_by = year)) arrange(right, year) ``` ## Cumulative aggregates Base R provides cumulative sum (`cumsum()`), cumulative min (`cummin()`), and cumulative max (`cummax()`). (It also provides `cumprod()` but that is rarely useful). Other common accumulating functions are `cumany()` and `cumall()`, cumulative versions of `||` and `&&`, and `cummean()`, a cumulative mean. These are not included in base R, but efficient versions are provided by `dplyr`. `cumany()` and `cumall()` are useful for selecting all rows up to, or all rows after, a condition is true for the first (or last) time. For example, we can use `cumany()` to find all records for a player after they played a year with 150 games: ```{r, eval = FALSE} filter(players, cumany(G > 150)) ``` Like lead and lag, you may want to control the order in which the accumulation occurs. None of the built in functions have an `order_by` argument so `dplyr` provides a helper: `order_by()`. You give it the variable you want to order by, and then the call to the window function: ```{r} x <- 1:10 y <- 10:1 order_by(y, cumsum(x)) ``` This function uses a bit of non-standard evaluation, so I wouldn't recommend using it inside another function; use the simpler but less concise `with_order()` instead. ## Recycled aggregates R's vector recycling makes it easy to select values that are higher or lower than a summary. I call this a recycled aggregate because the value of the aggregate is recycled to be the same length as the original vector. Recycled aggregates are useful if you want to find all records greater than the mean or less than the median: ```{r, eval = FALSE} filter(players, G > mean(G)) filter(players, G < median(G)) ``` While most SQL databases don't have an equivalent of `median()` or `quantile()`, when filtering you can achieve the same effect with `ntile()`. For example, `x > median(x)` is equivalent to `ntile(x, 2) == 2`; `x > quantile(x, 75)` is equivalent to `ntile(x, 100) > 75` or `ntile(x, 4) > 3`. ```{r, eval = FALSE} filter(players, ntile(G, 2) == 2) ``` You can also use this idea to select the records with the highest (`x == max(x)`) or lowest value (`x == min(x)`) for a field, but the ranking functions give you more control over ties, and allow you to select any number of records. Recycled aggregates are also useful in conjunction with `mutate()`. For example, with the batting data, we could compute the "career year", the number of years a player has played since they entered the league: ```{r} mutate(players, career_year = yearID - min(yearID) + 1) ``` Or, as in the introductory example, we could compute a z-score: ```{r} mutate(players, G_z = (G - mean(G)) / sd(G)) ``` dplyr/inst/doc/rowwise.html0000644000176200001440000022537214525507105015532 0ustar liggesusers Row-wise operations

Row-wise operations

dplyr, and R in general, are particularly well suited to performing operations over columns, and performing operations over rows is much harder. In this vignette, you’ll learn dplyr’s approach centred around the row-wise data frame created by rowwise().

There are three common use cases that we discuss in this vignette:

  • Row-wise aggregates (e.g. compute the mean of x, y, z).
  • Calling a function multiple times with varying arguments.
  • Working with list-columns.

These types of problems are often easily solved with a for loop, but it’s nice to have a solution that fits naturally into a pipeline.

Of course, someone has to write loops. It doesn’t have to be you. — Jenny Bryan

library(dplyr, warn.conflicts = FALSE)

Creating

Row-wise operations require a special type of grouping where each group consists of a single row. You create this with rowwise():

df <- tibble(x = 1:2, y = 3:4, z = 5:6)
df %>% rowwise()
#> # A tibble: 2 × 3
#> # Rowwise: 
#>       x     y     z
#>   <int> <int> <int>
#> 1     1     3     5
#> 2     2     4     6

Like group_by(), rowwise() doesn’t really do anything itself; it just changes how the other verbs work. For example, compare the results of mutate() in the following code:

df %>% mutate(m = mean(c(x, y, z)))
#> # A tibble: 2 × 4
#>       x     y     z     m
#>   <int> <int> <int> <dbl>
#> 1     1     3     5   3.5
#> 2     2     4     6   3.5
df %>% rowwise() %>% mutate(m = mean(c(x, y, z)))
#> # A tibble: 2 × 4
#> # Rowwise: 
#>       x     y     z     m
#>   <int> <int> <int> <dbl>
#> 1     1     3     5     3
#> 2     2     4     6     4

If you use mutate() with a regular data frame, it computes the mean of x, y, and z across all rows. If you apply it to a row-wise data frame, it computes the mean for each row.

You can optionally supply “identifier” variables in your call to rowwise(). These variables are preserved when you call summarise(), so they behave somewhat similarly to the grouping variables passed to group_by():

df <- tibble(name = c("Mara", "Hadley"), x = 1:2, y = 3:4, z = 5:6)

df %>% 
  rowwise() %>% 
  summarise(m = mean(c(x, y, z)))
#> # A tibble: 2 × 1
#>       m
#>   <dbl>
#> 1     3
#> 2     4

df %>% 
  rowwise(name) %>% 
  summarise(m = mean(c(x, y, z)))
#> `summarise()` has grouped output by 'name'. You can override using the
#> `.groups` argument.
#> # A tibble: 2 × 2
#> # Groups:   name [2]
#>   name       m
#>   <chr>  <dbl>
#> 1 Mara       3
#> 2 Hadley     4

rowwise() is just a special form of grouping, so if you want to remove it from a data frame, just call ungroup().

Per row summary statistics

dplyr::summarise() makes it really easy to summarise values across rows within one column. When combined with rowwise() it also makes it easy to summarise values across columns within one row. To see how, we’ll start by making a little dataset:

df <- tibble(id = 1:6, w = 10:15, x = 20:25, y = 30:35, z = 40:45)
df
#> # A tibble: 6 × 5
#>      id     w     x     y     z
#>   <int> <int> <int> <int> <int>
#> 1     1    10    20    30    40
#> 2     2    11    21    31    41
#> 3     3    12    22    32    42
#> 4     4    13    23    33    43
#> # ℹ 2 more rows

Let’s say we want compute the sum of w, x, y, and z for each row. We start by making a row-wise data frame:

rf <- df %>% rowwise(id)

We can then use mutate() to add a new column to each row, or summarise() to return just that one summary:

rf %>% mutate(total = sum(c(w, x, y, z)))
#> # A tibble: 6 × 6
#> # Rowwise:  id
#>      id     w     x     y     z total
#>   <int> <int> <int> <int> <int> <int>
#> 1     1    10    20    30    40   100
#> 2     2    11    21    31    41   104
#> 3     3    12    22    32    42   108
#> 4     4    13    23    33    43   112
#> # ℹ 2 more rows
rf %>% summarise(total = sum(c(w, x, y, z)))
#> `summarise()` has grouped output by 'id'. You can override using the `.groups`
#> argument.
#> # A tibble: 6 × 2
#> # Groups:   id [6]
#>      id total
#>   <int> <int>
#> 1     1   100
#> 2     2   104
#> 3     3   108
#> 4     4   112
#> # ℹ 2 more rows

Of course, if you have a lot of variables, it’s going to be tedious to type in every variable name. Instead, you can use c_across() which uses tidy selection syntax so you can to succinctly select many variables:

rf %>% mutate(total = sum(c_across(w:z)))
#> # A tibble: 6 × 6
#> # Rowwise:  id
#>      id     w     x     y     z total
#>   <int> <int> <int> <int> <int> <int>
#> 1     1    10    20    30    40   100
#> 2     2    11    21    31    41   104
#> 3     3    12    22    32    42   108
#> 4     4    13    23    33    43   112
#> # ℹ 2 more rows
rf %>% mutate(total = sum(c_across(where(is.numeric))))
#> # A tibble: 6 × 6
#> # Rowwise:  id
#>      id     w     x     y     z total
#>   <int> <int> <int> <int> <int> <int>
#> 1     1    10    20    30    40   100
#> 2     2    11    21    31    41   104
#> 3     3    12    22    32    42   108
#> 4     4    13    23    33    43   112
#> # ℹ 2 more rows

You could combine this with column-wise operations (see vignette("colwise") for more details) to compute the proportion of the total for each column:

rf %>% 
  mutate(total = sum(c_across(w:z))) %>% 
  ungroup() %>% 
  mutate(across(w:z, ~ . / total))
#> # A tibble: 6 × 6
#>      id     w     x     y     z total
#>   <int> <dbl> <dbl> <dbl> <dbl> <int>
#> 1     1 0.1   0.2   0.3   0.4     100
#> 2     2 0.106 0.202 0.298 0.394   104
#> 3     3 0.111 0.204 0.296 0.389   108
#> 4     4 0.116 0.205 0.295 0.384   112
#> # ℹ 2 more rows

Row-wise summary functions

The rowwise() approach will work for any summary function. But if you need greater speed, it’s worth looking for a built-in row-wise variant of your summary function. These are more efficient because they operate on the data frame as whole; they don’t split it into rows, compute the summary, and then join the results back together again.

df %>% mutate(total = rowSums(pick(where(is.numeric), -id)))
#> # A tibble: 6 × 6
#>      id     w     x     y     z total
#>   <int> <int> <int> <int> <int> <dbl>
#> 1     1    10    20    30    40   100
#> 2     2    11    21    31    41   104
#> 3     3    12    22    32    42   108
#> 4     4    13    23    33    43   112
#> # ℹ 2 more rows
df %>% mutate(mean = rowMeans(pick(where(is.numeric), -id)))
#> # A tibble: 6 × 6
#>      id     w     x     y     z  mean
#>   <int> <int> <int> <int> <int> <dbl>
#> 1     1    10    20    30    40    25
#> 2     2    11    21    31    41    26
#> 3     3    12    22    32    42    27
#> 4     4    13    23    33    43    28
#> # ℹ 2 more rows

NB: I use df (not rf) and pick() (not c_across()) here because rowMeans() and rowSums() take a multi-row data frame as input. Also note that -id is needed to avoid selecting id in pick(). This wasn’t required with the rowwise data frame because we had specified id as an identifier in our original call to rowwise(), preventing it from being selected as a grouping column.

List-columns

rowwise() operations are a natural pairing when you have list-columns. They allow you to avoid explicit loops and/or functions from the apply() or purrr::map() families.

Motivation

Imagine you have this data frame, and you want to count the lengths of each element:

df <- tibble(
  x = list(1, 2:3, 4:6)
)

You might try calling length():

df %>% mutate(l = length(x))
#> # A tibble: 3 × 2
#>   x             l
#>   <list>    <int>
#> 1 <dbl [1]>     3
#> 2 <int [2]>     3
#> 3 <int [3]>     3

But that returns the length of the column, not the length of the individual values. If you’re an R documentation aficionado, you might know there’s already a base R function just for this purpose:

df %>% mutate(l = lengths(x))
#> # A tibble: 3 × 2
#>   x             l
#>   <list>    <int>
#> 1 <dbl [1]>     1
#> 2 <int [2]>     2
#> 3 <int [3]>     3

Or if you’re an experienced R programmer, you might know how to apply a function to each element of a list using sapply(), vapply(), or one of the purrr map() functions:

df %>% mutate(l = sapply(x, length))
#> # A tibble: 3 × 2
#>   x             l
#>   <list>    <int>
#> 1 <dbl [1]>     1
#> 2 <int [2]>     2
#> 3 <int [3]>     3
df %>% mutate(l = purrr::map_int(x, length))
#> # A tibble: 3 × 2
#>   x             l
#>   <list>    <int>
#> 1 <dbl [1]>     1
#> 2 <int [2]>     2
#> 3 <int [3]>     3

But wouldn’t it be nice if you could just write length(x) and dplyr would figure out that you wanted to compute the length of the element inside of x? Since you’re here, you might already be guessing at the answer: this is just another application of the row-wise pattern.

df %>% 
  rowwise() %>% 
  mutate(l = length(x))
#> # A tibble: 3 × 2
#> # Rowwise: 
#>   x             l
#>   <list>    <int>
#> 1 <dbl [1]>     1
#> 2 <int [2]>     2
#> 3 <int [3]>     3

Subsetting

Before we continue on, I wanted to briefly mention the magic that makes this work. This isn’t something you’ll generally need to think about (it’ll just work), but it’s useful to know about when something goes wrong.

There’s an important difference between a grouped data frame where each group happens to have one row, and a row-wise data frame where every group always has one row. Take these two data frames:

df <- tibble(g = 1:2, y = list(1:3, "a"))
gf <- df %>% group_by(g)
rf <- df %>% rowwise(g)

If we compute some properties of y, you’ll notice the results look different:

gf %>% mutate(type = typeof(y), length = length(y))
#> # A tibble: 2 × 4
#> # Groups:   g [2]
#>       g y         type  length
#>   <int> <list>    <chr>  <int>
#> 1     1 <int [3]> list       1
#> 2     2 <chr [1]> list       1
rf %>% mutate(type = typeof(y), length = length(y))
#> # A tibble: 2 × 4
#> # Rowwise:  g
#>       g y         type      length
#>   <int> <list>    <chr>      <int>
#> 1     1 <int [3]> integer        3
#> 2     2 <chr [1]> character      1

They key difference is that when mutate() slices up the columns to pass to length(y) the grouped mutate uses [ and the row-wise mutate uses [[. The following code gives a flavour of the differences if you used a for loop:

# grouped
out1 <- integer(2)
for (i in 1:2) {
  out1[[i]] <- length(df$y[i])
}
out1
#> [1] 1 1

# rowwise
out2 <- integer(2)
for (i in 1:2) {
  out2[[i]] <- length(df$y[[i]])
}
out2
#> [1] 3 1

Note that this magic only applies when you’re referring to existing columns, not when you’re creating new rows. This is potentially confusing, but we’re fairly confident it’s the least worst solution, particularly given the hint in the error message.

gf %>% mutate(y2 = y)
#> # A tibble: 2 × 3
#> # Groups:   g [2]
#>       g y         y2       
#>   <int> <list>    <list>   
#> 1     1 <int [3]> <int [3]>
#> 2     2 <chr [1]> <chr [1]>
rf %>% mutate(y2 = y)
#> Error in `mutate()`:
#> ℹ In argument: `y2 = y`.
#> ℹ In row 1.
#> Caused by error:
#> ! `y2` must be size 1, not 3.
#> ℹ Did you mean: `y2 = list(y)` ?
rf %>% mutate(y2 = list(y))
#> # A tibble: 2 × 3
#> # Rowwise:  g
#>       g y         y2       
#>   <int> <list>    <list>   
#> 1     1 <int [3]> <int [3]>
#> 2     2 <chr [1]> <chr [1]>

Modelling

rowwise() data frames allow you to solve a variety of modelling problems in what I think is a particularly elegant way. We’ll start by creating a nested data frame:

by_cyl <- mtcars %>% nest_by(cyl)
by_cyl
#> # A tibble: 3 × 2
#> # Rowwise:  cyl
#>     cyl data              
#>   <dbl> <list>            
#> 1     4 <tibble [11 × 12]>
#> 2     6 <tibble [7 × 12]> 
#> 3     8 <tibble [14 × 12]>

This is a little different to the usual group_by() output: we have visibly changed the structure of the data. Now we have three rows (one for each group), and we have a list-col, data, that stores the data for that group. Also note that the output is rowwise(); this is important because it’s going to make working with that list of data frames much easier.

Once we have one data frame per row, it’s straightforward to make one model per row:

mods <- by_cyl %>% mutate(mod = list(lm(mpg ~ wt, data = data)))
mods
#> # A tibble: 3 × 3
#> # Rowwise:  cyl
#>     cyl data               mod   
#>   <dbl> <list>             <list>
#> 1     4 <tibble [11 × 12]> <lm>  
#> 2     6 <tibble [7 × 12]>  <lm>  
#> 3     8 <tibble [14 × 12]> <lm>

And supplement that with one set of predictions per row:

mods <- mods %>% mutate(pred = list(predict(mod, data)))
mods
#> # A tibble: 3 × 4
#> # Rowwise:  cyl
#>     cyl data               mod    pred      
#>   <dbl> <list>             <list> <list>    
#> 1     4 <tibble [11 × 12]> <lm>   <dbl [11]>
#> 2     6 <tibble [7 × 12]>  <lm>   <dbl [7]> 
#> 3     8 <tibble [14 × 12]> <lm>   <dbl [14]>

You could then summarise the model in a variety of ways:

mods %>% summarise(rmse = sqrt(mean((pred - data$mpg) ^ 2)))
#> `summarise()` has grouped output by 'cyl'. You can override using the `.groups`
#> argument.
#> # A tibble: 3 × 2
#> # Groups:   cyl [3]
#>     cyl  rmse
#>   <dbl> <dbl>
#> 1     4 3.01 
#> 2     6 0.985
#> 3     8 1.87
mods %>% summarise(rsq = summary(mod)$r.squared)
#> `summarise()` has grouped output by 'cyl'. You can override using the `.groups`
#> argument.
#> # A tibble: 3 × 2
#> # Groups:   cyl [3]
#>     cyl   rsq
#>   <dbl> <dbl>
#> 1     4 0.509
#> 2     6 0.465
#> 3     8 0.423
mods %>% summarise(broom::glance(mod))
#> `summarise()` has grouped output by 'cyl'. You can override using the `.groups`
#> argument.
#> # A tibble: 3 × 13
#> # Groups:   cyl [3]
#>     cyl r.squared adj.r.squared sigma statistic p.value    df logLik   AIC   BIC
#>   <dbl>     <dbl>         <dbl> <dbl>     <dbl>   <dbl> <dbl>  <dbl> <dbl> <dbl>
#> 1     4     0.509         0.454  3.33      9.32  0.0137     1 -27.7   61.5  62.7
#> 2     6     0.465         0.357  1.17      4.34  0.0918     1  -9.83  25.7  25.5
#> 3     8     0.423         0.375  2.02      8.80  0.0118     1 -28.7   63.3  65.2
#> # ℹ 3 more variables: deviance <dbl>, df.residual <int>, nobs <int>

Or easily access the parameters of each model:

mods %>% reframe(broom::tidy(mod))
#> # A tibble: 6 × 6
#>     cyl term        estimate std.error statistic    p.value
#>   <dbl> <chr>          <dbl>     <dbl>     <dbl>      <dbl>
#> 1     4 (Intercept)    39.6       4.35      9.10 0.00000777
#> 2     4 wt             -5.65      1.85     -3.05 0.0137    
#> 3     6 (Intercept)    28.4       4.18      6.79 0.00105   
#> 4     6 wt             -2.78      1.33     -2.08 0.0918    
#> # ℹ 2 more rows

Repeated function calls

rowwise() doesn’t just work with functions that return a length-1 vector (aka summary functions); it can work with any function if the result is a list. This means that rowwise() and mutate() provide an elegant way to call a function many times with varying arguments, storing the outputs alongside the inputs.

Simulations

I think this is a particularly elegant way to perform simulations, because it lets you store simulated values along with the parameters that generated them. For example, imagine you have the following data frame that describes the properties of 3 samples from the uniform distribution:

df <- tribble(
  ~ n, ~ min, ~ max,
    1,     0,     1,
    2,    10,   100,
    3,   100,  1000,
)

You can supply these parameters to runif() by using rowwise() and mutate():

df %>% 
  rowwise() %>% 
  mutate(data = list(runif(n, min, max)))
#> # A tibble: 3 × 4
#> # Rowwise: 
#>       n   min   max data     
#>   <dbl> <dbl> <dbl> <list>   
#> 1     1     0     1 <dbl [1]>
#> 2     2    10   100 <dbl [2]>
#> 3     3   100  1000 <dbl [3]>

Note the use of list() here - runif() returns multiple values and a mutate() expression has to return something of length 1. list() means that we’ll get a list column where each row is a list containing multiple values. If you forget to use list(), dplyr will give you a hint:

df %>% 
  rowwise() %>% 
  mutate(data = runif(n, min, max))
#> Error in `mutate()`:
#> ℹ In argument: `data = runif(n, min, max)`.
#> ℹ In row 2.
#> Caused by error:
#> ! `data` must be size 1, not 2.
#> ℹ Did you mean: `data = list(runif(n, min, max))` ?

Multiple combinations

What if you want to call a function for every combination of inputs? You can use expand.grid() (or tidyr::expand_grid()) to generate the data frame and then repeat the same pattern as above:

df <- expand.grid(mean = c(-1, 0, 1), sd = c(1, 10, 100))

df %>% 
  rowwise() %>% 
  mutate(data = list(rnorm(10, mean, sd)))
#> # A tibble: 9 × 3
#> # Rowwise: 
#>    mean    sd data      
#>   <dbl> <dbl> <list>    
#> 1    -1     1 <dbl [10]>
#> 2     0     1 <dbl [10]>
#> 3     1     1 <dbl [10]>
#> 4    -1    10 <dbl [10]>
#> # ℹ 5 more rows

Varying functions

In more complicated problems, you might also want to vary the function being called. This tends to be a bit more of an awkward fit with this approach because the columns in the input tibble will be less regular. But it’s still possible, and it’s a natural place to use do.call():

df <- tribble(
   ~rng,     ~params,
   "runif",  list(n = 10), 
   "rnorm",  list(n = 20),
   "rpois",  list(n = 10, lambda = 5),
) %>%
  rowwise()

df %>% 
  mutate(data = list(do.call(rng, params)))
#> # A tibble: 3 × 3
#> # Rowwise: 
#>   rng   params           data      
#>   <chr> <list>           <list>    
#> 1 runif <named list [1]> <dbl [10]>
#> 2 rnorm <named list [1]> <dbl [20]>
#> 3 rpois <named list [2]> <int [10]>

Previously

rowwise()

rowwise() was also questioning for quite some time, partly because I didn’t appreciate how many people needed the native ability to compute summaries across multiple variables for each row. As an alternative, we recommended performing row-wise operations with the purrr map() functions. However, this was challenging because you needed to pick a map function based on the number of arguments that were varying and the type of result, which required quite some knowledge of purrr functions.

I was also resistant to rowwise() because I felt like automatically switching between [ to [[ was too magical in the same way that automatically list()-ing results made do() too magical. I’ve now persuaded myself that the row-wise magic is good magic partly because most people find the distinction between [ and [[ mystifying and rowwise() means that you don’t need to think about it.

Since rowwise() clearly is useful it is not longer questioning, and we expect it to be around for the long term.

do()

We’ve questioned the need for do() for quite some time, because it never felt very similar to the other dplyr verbs. It had two main modes of operation:

  • Without argument names: you could call functions that input and output data frames using . to refer to the “current” group. For example, the following code gets the first row of each group:

    mtcars %>% 
      group_by(cyl) %>% 
      do(head(., 1))
    #> # A tibble: 3 × 13
    #> # Groups:   cyl [3]
    #>     mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb  cyl2  cyl4
    #>   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
    #> 1  22.8     4   108    93  3.85  2.32  18.6     1     1     4     1     8    16
    #> 2  21       6   160   110  3.9   2.62  16.5     0     1     4     4    12    24
    #> 3  18.7     8   360   175  3.15  3.44  17.0     0     0     3     2    16    32

    This has been superseded by pick() plus reframe(), a variant of summarise() that can create multiple rows and columns per group.

    mtcars %>% 
      group_by(cyl) %>% 
      reframe(head(pick(everything()), 1))
    #> # A tibble: 3 × 13
    #>     cyl   mpg  disp    hp  drat    wt  qsec    vs    am  gear  carb  cyl2  cyl4
    #>   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
    #> 1     4  22.8   108    93  3.85  2.32  18.6     1     1     4     1     8    16
    #> 2     6  21     160   110  3.9   2.62  16.5     0     1     4     4    12    24
    #> 3     8  18.7   360   175  3.15  3.44  17.0     0     0     3     2    16    32
  • With arguments: it worked like mutate() but automatically wrapped every element in a list:

    mtcars %>% 
      group_by(cyl) %>% 
      do(nrows = nrow(.))
    #> # A tibble: 3 × 2
    #> # Rowwise: 
    #>     cyl nrows    
    #>   <dbl> <list>   
    #> 1     4 <int [1]>
    #> 2     6 <int [1]>
    #> 3     8 <int [1]>

    I now believe that behaviour is both too magical and not very useful, and it can be replaced by summarise() and pick().

    mtcars %>% 
      group_by(cyl) %>% 
      summarise(nrows = nrow(pick(everything())))
    #> # A tibble: 3 × 2
    #>     cyl nrows
    #>   <dbl> <int>
    #> 1     4    11
    #> 2     6     7
    #> 3     8    14

    If needed (unlike here), you can wrap the results in a list yourself.

The addition of pick()/across() and the increased scope of summarise()/reframe() means that do() is no longer needed, so it is now superseded.

dplyr/inst/doc/base.Rmd0000644000176200001440000002744414406402754014525 0ustar liggesusers--- title: "dplyr <-> base R" output: rmarkdown::html_vignette description: > How does dplyr compare to base R? This vignette describes the main differences in philosophy, and shows the base R code most closely equivalent to each dplyr verb. vignette: > %\VignetteIndexEntry{dplyr <-> base R} %\VignetteEngine{knitr::rmarkdown} \usepackage[utf8]{inputenc} --- ```{r, echo = FALSE, message = FALSE} knitr::opts_chunk$set(collapse = T, comment = "#>") options(tibble.print_min = 4, tibble.print_max = 4) ``` This vignette compares dplyr functions to their base R equivalents. This helps those familiar with base R understand better what dplyr does, and shows dplyr users how you might express the same ideas in base R code. We'll start with a rough overview of the major differences, then discuss the one table verbs in more detail, followed by the two table verbs. # Overview 1. The code dplyr verbs input and output data frames. This contrasts with base R functions which more frequently work with individual vectors. 1. dplyr relies heavily on "non-standard evaluation" so that you don't need to use `$` to refer to columns in the "current" data frame. This behaviour is inspired by the base functions `subset()` and `transform()`. 1. dplyr solutions tend to use a variety of single purpose verbs, while base R solutions typically tend to use `[` in a variety of ways, depending on the task at hand. 1. Multiple dplyr verbs are often strung together into a pipeline by `%>%`. In base R, you'll typically save intermediate results to a variable that you either discard, or repeatedly overwrite. 1. All dplyr verbs handle "grouped" data frames so that the code to perform a computation per-group looks very similar to code that works on a whole data frame. In base R, per-group operations tend to have varied forms. # One table verbs The following table shows a condensed translation between dplyr verbs and their base R equivalents. The following sections describe each operation in more detail. You'll learn more about the dplyr verbs in their documentation and in `vignette("dplyr")`. | dplyr | base | |------------------------------- |--------------------------------------------------| | `arrange(df, x)` | `df[order(x), , drop = FALSE]` | | `distinct(df, x)` | `df[!duplicated(x), , drop = FALSE]`, `unique()` | | `filter(df, x)` | `df[which(x), , drop = FALSE]`, `subset()` | | `mutate(df, z = x + y)` | `df$z <- df$x + df$y`, `transform()` | | `pull(df, 1)` | `df[[1]]` | | `pull(df, x)` | `df$x` | | `rename(df, y = x)` | `names(df)[names(df) == "x"] <- "y"` | | `relocate(df, y)` | `df[union("y", names(df))]` | | `select(df, x, y)` | `df[c("x", "y")]`, `subset()` | | `select(df, starts_with("x"))` | `df[grepl("^x", names(df))]` | | `summarise(df, mean(x))` | `mean(df$x)`, `tapply()`, `aggregate()`, `by()` | | `slice(df, c(1, 2, 5))` | `df[c(1, 2, 5), , drop = FALSE]` | To begin, we'll load dplyr and convert `mtcars` and `iris` to tibbles so that we can easily show only abbreviated output for each operation. ```{r setup, message = FALSE} library(dplyr) mtcars <- as_tibble(mtcars) iris <- as_tibble(iris) ``` ## `arrange()`: Arrange rows by variables `dplyr::arrange()` orders the rows of a data frame by the values of one or more columns: ```{r} mtcars %>% arrange(cyl, disp) ``` The `desc()` helper allows you to order selected variables in descending order: ```{r} mtcars %>% arrange(desc(cyl), desc(disp)) ``` We can replicate in base R by using `[` with `order()`: ```{r} mtcars[order(mtcars$cyl, mtcars$disp), , drop = FALSE] ``` Note the use of `drop = FALSE`. If you forget this, and the input is a data frame with a single column, the output will be a vector, not a data frame. This is a source of subtle bugs. Base R does not provide a convenient and general way to sort individual variables in descending order, so you have two options: * For numeric variables, you can use `-x`. * You can request `order()` to sort all variables in descending order. ```{r, results = FALSE} mtcars[order(mtcars$cyl, mtcars$disp, decreasing = TRUE), , drop = FALSE] mtcars[order(-mtcars$cyl, -mtcars$disp), , drop = FALSE] ``` ## `distinct()`: Select distinct/unique rows `dplyr::distinct()` selects unique rows: ```{r} df <- tibble( x = sample(10, 100, rep = TRUE), y = sample(10, 100, rep = TRUE) ) df %>% distinct(x) # selected columns df %>% distinct(x, .keep_all = TRUE) # whole data frame ``` There are two equivalents in base R, depending on whether you want the whole data frame, or just selected variables: ```{r} unique(df["x"]) # selected columns df[!duplicated(df$x), , drop = FALSE] # whole data frame ``` ## `filter()`: Return rows with matching conditions `dplyr::filter()` selects rows where an expression is `TRUE`: ```{r} starwars %>% filter(species == "Human") starwars %>% filter(mass > 1000) starwars %>% filter(hair_color == "none" & eye_color == "black") ``` The closest base equivalent (and the inspiration for `filter()`) is `subset()`: ```{r} subset(starwars, species == "Human") subset(starwars, mass > 1000) subset(starwars, hair_color == "none" & eye_color == "black") ``` You can also use `[` but this also requires the use of `which()` to remove `NA`s: ```{r} starwars[which(starwars$species == "Human"), , drop = FALSE] starwars[which(starwars$mass > 1000), , drop = FALSE] starwars[which(starwars$hair_color == "none" & starwars$eye_color == "black"), , drop = FALSE] ``` ## `mutate()`: Create or transform variables `dplyr::mutate()` creates new variables from existing variables: ```{r} df %>% mutate(z = x + y, z2 = z ^ 2) ``` The closest base equivalent is `transform()`, but note that it cannot use freshly created variables: ```{r} head(transform(df, z = x + y, z2 = (x + y) ^ 2)) ``` Alternatively, you can use `$<-`: ```{r} mtcars$cyl2 <- mtcars$cyl * 2 mtcars$cyl4 <- mtcars$cyl2 * 2 ``` When applied to a grouped data frame, `dplyr::mutate()` computes new variable once per group: ```{r} gf <- tibble(g = c(1, 1, 2, 2), x = c(0.5, 1.5, 2.5, 3.5)) gf %>% group_by(g) %>% mutate(x_mean = mean(x), x_rank = rank(x)) ``` To replicate this in base R, you can use `ave()`: ```{r} transform(gf, x_mean = ave(x, g, FUN = mean), x_rank = ave(x, g, FUN = rank) ) ``` ## `pull()`: Pull out a single variable `dplyr::pull()` extracts a variable either by name or position: ```{r} mtcars %>% pull(1) mtcars %>% pull(cyl) ``` This equivalent to `[[` for positions and `$` for names: ```{r} mtcars[[1]] mtcars$cyl ``` ## `relocate()`: Change column order `dplyr::relocate()` makes it easy to move a set of columns to a new position (by default, the front): ```{r} # to front mtcars %>% relocate(gear, carb) # to back mtcars %>% relocate(mpg, cyl, .after = last_col()) ``` We can replicate this in base R with a little set manipulation: ```{r} mtcars[union(c("gear", "carb"), names(mtcars))] to_back <- c("mpg", "cyl") mtcars[c(setdiff(names(mtcars), to_back), to_back)] ``` Moving columns to somewhere in the middle requires a little more set twiddling. ## `rename()`: Rename variables by name `dplyr::rename()` allows you to rename variables by name or position: ```{r} iris %>% rename(sepal_length = Sepal.Length, sepal_width = 2) ``` Renaming variables by position is straight forward in base R: ```{r} iris2 <- iris names(iris2)[2] <- "sepal_width" ``` Renaming variables by name requires a bit more work: ```{r} names(iris2)[names(iris2) == "Sepal.Length"] <- "sepal_length" ``` ## `rename_with()`: Rename variables with a function `dplyr::rename_with()` transform column names with a function: ```{r} iris %>% rename_with(toupper) ``` A similar effect can be achieved with `setNames()` in base R: ```{r} setNames(iris, toupper(names(iris))) ``` ## `select()`: Select variables by name `dplyr::select()` subsets columns by position, name, function of name, or other property: ```{r} iris %>% select(1:3) iris %>% select(Species, Sepal.Length) iris %>% select(starts_with("Petal")) iris %>% select(where(is.factor)) ``` Subsetting variables by position is straightforward in base R: ```{r} iris[1:3] # single argument selects columns; never drops iris[1:3, , drop = FALSE] ``` You have two options to subset by name: ```{r} iris[c("Species", "Sepal.Length")] subset(iris, select = c(Species, Sepal.Length)) ``` Subsetting by function of name requires a bit of work with `grep()`: ```{r} iris[grep("^Petal", names(iris))] ``` And you can use `Filter()` to subset by type: ```{r} Filter(is.factor, iris) ``` ## `summarise()`: Reduce multiple values down to a single value `dplyr::summarise()` computes one or more summaries for each group: ```{r} mtcars %>% group_by(cyl) %>% summarise(mean = mean(disp), n = n()) ``` I think the closest base R equivalent uses `by()`. Unfortunately `by()` returns a list of data frames, but you can combine them back together again with `do.call()` and `rbind()`: ```{r} mtcars_by <- by(mtcars, mtcars$cyl, function(df) { with(df, data.frame(cyl = cyl[[1]], mean = mean(disp), n = nrow(df))) }) do.call(rbind, mtcars_by) ``` `aggregate()` comes very close to providing an elegant answer: ```{r} agg <- aggregate(disp ~ cyl, mtcars, function(x) c(mean = mean(x), n = length(x))) agg ``` But unfortunately while it looks like there are `disp.mean` and `disp.n` columns, it's actually a single matrix column: ```{r} str(agg) ``` You can see a variety of other options at . ## `slice()`: Choose rows by position `slice()` selects rows with their location: ```{r} slice(mtcars, 25:n()) ``` This is straightforward to replicate with `[`: ```{r} mtcars[25:nrow(mtcars), , drop = FALSE] ``` # Two-table verbs When we want to merge two data frames, `x` and `y`), we have a variety of different ways to bring them together. Various base R `merge()` calls are replaced by a variety of dplyr `join()` functions. | dplyr | base | |------------------------|-----------------------------------------| | `inner_join(df1, df2)` |`merge(df1, df2)` | | `left_join(df1, df2) ` |`merge(df1, df2, all.x = TRUE)` | | `right_join(df1, df2)` |`merge(df1, df2, all.y = TRUE)` | | `full_join(df1, df2)` |`merge(df1, df2, all = TRUE)` | | `semi_join(df1, df2)` |`df1[df1$x %in% df2$x, , drop = FALSE]` | | `anti_join(df1, df2)` |`df1[!df1$x %in% df2$x, , drop = FALSE]` | For more information about two-table verbs, see `vignette("two-table")`. ### Mutating joins dplyr's `inner_join()`, `left_join()`, `right_join()`, and `full_join()` add new columns from `y` to `x`, matching rows based on a set of "keys", and differ only in how missing matches are handled. They are equivalent to calls to `merge()` with various settings of the `all`, `all.x`, and `all.y` arguments. The main difference is the order of the rows: * dplyr preserves the order of the `x` data frame. * `merge()` sorts the key columns. ### Filtering joins dplyr's `semi_join()` and `anti_join()` affect only the rows, not the columns: ```{r} band_members %>% semi_join(band_instruments) band_members %>% anti_join(band_instruments) ``` They can be replicated in base R with `[` and `%in%`: ```{r} band_members[band_members$name %in% band_instruments$name, , drop = FALSE] band_members[!band_members$name %in% band_instruments$name, , drop = FALSE] ``` Semi and anti joins with multiple key variables are considerably more challenging to implement. dplyr/inst/doc/programming.R0000644000176200001440000001270514525507104015603 0ustar liggesusers## ----echo = FALSE, message = FALSE-------------------------------------------- knitr::opts_chunk$set(collapse = T, comment = "#>") options(tibble.print_min = 4L, tibble.print_max = 4L) set.seed(1014) ## ----setup, message = FALSE--------------------------------------------------- library(dplyr) ## ----results = FALSE---------------------------------------------------------- starwars[starwars$homeworld == "Naboo" & starwars$species == "Human", ,] ## ----results = FALSE---------------------------------------------------------- starwars %>% filter(homeworld == "Naboo", species == "Human") ## ----------------------------------------------------------------------------- df <- data.frame(x = runif(3), y = runif(3)) df$x ## ----results = FALSE---------------------------------------------------------- var_summary <- function(data, var) { data %>% summarise(n = n(), min = min({{ var }}), max = max({{ var }})) } mtcars %>% group_by(cyl) %>% var_summary(mpg) ## ----results = FALSE---------------------------------------------------------- for (var in names(mtcars)) { mtcars %>% count(.data[[var]]) %>% print() } ## ----------------------------------------------------------------------------- name <- "susan" tibble("{name}" := 2) ## ----------------------------------------------------------------------------- my_df <- function(x) { tibble("{{x}}_2" := x * 2) } my_var <- 10 my_df(my_var) ## ----results = FALSE---------------------------------------------------------- summarise_mean <- function(data, vars) { data %>% summarise(n = n(), across({{ vars }}, mean)) } mtcars %>% group_by(cyl) %>% summarise_mean(where(is.numeric)) ## ----results = FALSE---------------------------------------------------------- vars <- c("mpg", "vs") mtcars %>% select(all_of(vars)) mtcars %>% select(!all_of(vars)) ## ----------------------------------------------------------------------------- mutate_y <- function(data) { mutate(data, y = a + x) } ## ----------------------------------------------------------------------------- my_summarise <- function(data, group_var) { data %>% group_by({{ group_var }}) %>% summarise(mean = mean(mass)) } ## ----------------------------------------------------------------------------- my_summarise2 <- function(data, expr) { data %>% summarise( mean = mean({{ expr }}), sum = sum({{ expr }}), n = n() ) } ## ----------------------------------------------------------------------------- my_summarise3 <- function(data, mean_var, sd_var) { data %>% summarise(mean = mean({{ mean_var }}), sd = sd({{ sd_var }})) } ## ----------------------------------------------------------------------------- my_summarise4 <- function(data, expr) { data %>% summarise( "mean_{{expr}}" := mean({{ expr }}), "sum_{{expr}}" := sum({{ expr }}), "n_{{expr}}" := n() ) } my_summarise5 <- function(data, mean_var, sd_var) { data %>% summarise( "mean_{{mean_var}}" := mean({{ mean_var }}), "sd_{{sd_var}}" := sd({{ sd_var }}) ) } ## ----------------------------------------------------------------------------- my_summarise <- function(.data, ...) { .data %>% group_by(...) %>% summarise(mass = mean(mass, na.rm = TRUE), height = mean(height, na.rm = TRUE)) } starwars %>% my_summarise(homeworld) starwars %>% my_summarise(sex, gender) ## ----------------------------------------------------------------------------- quantile_df <- function(x, probs = c(0.25, 0.5, 0.75)) { tibble( val = quantile(x, probs), quant = probs ) } x <- 1:5 quantile_df(x) ## ----------------------------------------------------------------------------- df <- tibble( grp = rep(1:3, each = 10), x = runif(30), y = rnorm(30) ) df %>% group_by(grp) %>% summarise(quantile_df(x, probs = .5)) df %>% group_by(grp) %>% summarise(across(x:y, ~ quantile_df(.x, probs = .5), .unpack = TRUE)) ## ----------------------------------------------------------------------------- df %>% group_by(grp) %>% reframe(across(x:y, quantile_df, .unpack = TRUE)) ## ----------------------------------------------------------------------------- my_summarise <- function(data, summary_vars) { data %>% summarise(across({{ summary_vars }}, ~ mean(., na.rm = TRUE))) } starwars %>% group_by(species) %>% my_summarise(c(mass, height)) ## ----------------------------------------------------------------------------- my_summarise <- function(data, group_var, summarise_var) { data %>% group_by(pick({{ group_var }})) %>% summarise(across({{ summarise_var }}, mean)) } ## ----------------------------------------------------------------------------- my_summarise <- function(data, group_var, summarise_var) { data %>% group_by(pick({{ group_var }})) %>% summarise(across({{ summarise_var }}, mean, .names = "mean_{.col}")) } ## ----results = FALSE---------------------------------------------------------- for (var in names(mtcars)) { mtcars %>% count(.data[[var]]) %>% print() } ## ----results = FALSE---------------------------------------------------------- mtcars %>% names() %>% purrr::map(~ count(mtcars, .data[[.x]])) ## ----eval = FALSE------------------------------------------------------------- # library(shiny) # ui <- fluidPage( # selectInput("var", "Variable", choices = names(diamonds)), # tableOutput("output") # ) # server <- function(input, output, session) { # data <- reactive(filter(diamonds, .data[[input$var]] > 0)) # output$output <- renderTable(head(data())) # } dplyr/inst/doc/two-table.Rmd0000644000176200001440000001617414266276767015530 0ustar liggesusers--- title: "Two-table verbs" description: > Most dplyr verbs work with a single data set, but most data analyses involve multiple datasets. This vignette introduces you to the dplyr verbs that work with more one than data set, and introduces to the mutating joins, filtering joins, and the set operations. output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Two-table verbs} %\VignetteEngine{knitr::rmarkdown} \usepackage[utf8]{inputenc} --- ```{r, echo = FALSE, message = FALSE} knitr::opts_chunk$set(collapse = T, comment = "#>") options(tibble.print_min = 5) library(dplyr) ``` It's rare that a data analysis involves only a single table of data. In practice, you'll normally have many tables that contribute to an analysis, and you need flexible tools to combine them. In dplyr, there are three families of verbs that work with two tables at a time: * Mutating joins, which add new variables to one table from matching rows in another. * Filtering joins, which filter observations from one table based on whether or not they match an observation in the other table. * Set operations, which combine the observations in the data sets as if they were set elements. (This discussion assumes that you have [tidy data](https://www.jstatsoft.org/v59/i10/), where the rows are observations and the columns are variables. If you're not familiar with that framework, I'd recommend reading up on it first.) All two-table verbs work similarly. The first two arguments are `x` and `y`, and provide the tables to combine. The output is always a new table with the same type as `x`. ## Mutating joins Mutating joins allow you to combine variables from multiple tables. For example, consider the flights and airlines data from the nycflights13 package. In one table we have flight information with an abbreviation for carrier, and in another we have a mapping between abbreviations and full names. You can use a join to add the carrier names to the flight data: ```{r, warning = FALSE} library(nycflights13) # Drop unimportant variables so it's easier to understand the join results. flights2 <- flights %>% select(year:day, hour, origin, dest, tailnum, carrier) flights2 %>% left_join(airlines) ``` ### Controlling how the tables are matched As well as `x` and `y`, each mutating join takes an argument `by` that controls which variables are used to match observations in the two tables. There are a few ways to specify it, as I illustrate below with various tables from nycflights13: * `NULL`, the default. dplyr will will use all variables that appear in both tables, a __natural__ join. For example, the flights and weather tables match on their common variables: year, month, day, hour and origin. ```{r} flights2 %>% left_join(weather) ``` * A character vector, `by = "x"`. Like a natural join, but uses only some of the common variables. For example, `flights` and `planes` have `year` columns, but they mean different things so we only want to join by `tailnum`. ```{r} flights2 %>% left_join(planes, by = "tailnum") ``` Note that the year columns in the output are disambiguated with a suffix. * A named character vector: `by = c("x" = "a")`. This will match variable `x` in table `x` to variable `a` in table `y`. The variables from use will be used in the output. Each flight has an origin and destination `airport`, so we need to specify which one we want to join to: ```{r} flights2 %>% left_join(airports, c("dest" = "faa")) flights2 %>% left_join(airports, c("origin" = "faa")) ``` ### Types of join There are four types of mutating join, which differ in their behaviour when a match is not found. We'll illustrate each with a simple example: ```{r} df1 <- tibble(x = c(1, 2), y = 2:1) df2 <- tibble(x = c(3, 1), a = 10, b = "a") ``` * `inner_join(x, y)` only includes observations that match in both `x` and `y`. ```{r} df1 %>% inner_join(df2) %>% knitr::kable() ``` * `left_join(x, y)` includes all observations in `x`, regardless of whether they match or not. This is the most commonly used join because it ensures that you don't lose observations from your primary table. ```{r} df1 %>% left_join(df2) ``` * `right_join(x, y)` includes all observations in `y`. It's equivalent to `left_join(y, x)`, but the columns and rows will be ordered differently. ```{r} df1 %>% right_join(df2) df2 %>% left_join(df1) ``` * `full_join()` includes all observations from `x` and `y`. ```{r} df1 %>% full_join(df2) ``` The left, right and full joins are collectively know as __outer joins__. When a row doesn't match in an outer join, the new variables are filled in with missing values. ### Observations While mutating joins are primarily used to add new variables, they can also generate new observations. If a match is not unique, a join will add all possible combinations (the Cartesian product) of the matching observations: ```{r} df1 <- tibble(x = c(1, 1, 2), y = 1:3) df2 <- tibble(x = c(1, 1, 2), z = c("a", "b", "a")) df1 %>% left_join(df2) ``` ## Filtering joins Filtering joins match observations in the same way as mutating joins, but affect the observations, not the variables. There are two types: * `semi_join(x, y)` __keeps__ all observations in `x` that have a match in `y`. * `anti_join(x, y)` __drops__ all observations in `x` that have a match in `y`. These are most useful for diagnosing join mismatches. For example, there are many flights in the nycflights13 dataset that don't have a matching tail number in the planes table: ```{r} library("nycflights13") flights %>% anti_join(planes, by = "tailnum") %>% count(tailnum, sort = TRUE) ``` If you're worried about what observations your joins will match, start with a `semi_join()` or `anti_join()`. `semi_join()` and `anti_join()` never duplicate; they only ever remove observations. ```{r} df1 <- tibble(x = c(1, 1, 3, 4), y = 1:4) df2 <- tibble(x = c(1, 1, 2), z = c("a", "b", "a")) # Four rows to start with: df1 %>% nrow() # And we get four rows after the join df1 %>% inner_join(df2, by = "x") %>% nrow() # But only two rows actually match df1 %>% semi_join(df2, by = "x") %>% nrow() ``` ## Set operations The final type of two-table verb is set operations. These expect the `x` and `y` inputs to have the same variables, and treat the observations like sets: * `intersect(x, y)`: return only observations in both `x` and `y` * `union(x, y)`: return unique observations in `x` and `y` * `setdiff(x, y)`: return observations in `x`, but not in `y`. Given this simple data: ```{r} (df1 <- tibble(x = 1:2, y = c(1L, 1L))) (df2 <- tibble(x = 1:2, y = 1:2)) ``` The four possibilities are: ```{r} intersect(df1, df2) # Note that we get 3 rows, not 4 union(df1, df2) setdiff(df1, df2) setdiff(df2, df1) ``` ## Multiple-table verbs dplyr does not provide any functions for working with three or more tables. Instead use `purrr::reduce()` or `Reduce()`, as described in [Advanced R](http://adv-r.had.co.nz/Functionals.html#functionals-fp), to iteratively combine the two-table verbs to handle as many tables as you need. dplyr/inst/doc/base.R0000644000176200001440000001403614525507100014166 0ustar liggesusers## ----echo = FALSE, message = FALSE-------------------------------------------- knitr::opts_chunk$set(collapse = T, comment = "#>") options(tibble.print_min = 4, tibble.print_max = 4) ## ----setup, message = FALSE--------------------------------------------------- library(dplyr) mtcars <- as_tibble(mtcars) iris <- as_tibble(iris) ## ----------------------------------------------------------------------------- mtcars %>% arrange(cyl, disp) ## ----------------------------------------------------------------------------- mtcars %>% arrange(desc(cyl), desc(disp)) ## ----------------------------------------------------------------------------- mtcars[order(mtcars$cyl, mtcars$disp), , drop = FALSE] ## ----results = FALSE---------------------------------------------------------- mtcars[order(mtcars$cyl, mtcars$disp, decreasing = TRUE), , drop = FALSE] mtcars[order(-mtcars$cyl, -mtcars$disp), , drop = FALSE] ## ----------------------------------------------------------------------------- df <- tibble( x = sample(10, 100, rep = TRUE), y = sample(10, 100, rep = TRUE) ) df %>% distinct(x) # selected columns df %>% distinct(x, .keep_all = TRUE) # whole data frame ## ----------------------------------------------------------------------------- unique(df["x"]) # selected columns df[!duplicated(df$x), , drop = FALSE] # whole data frame ## ----------------------------------------------------------------------------- starwars %>% filter(species == "Human") starwars %>% filter(mass > 1000) starwars %>% filter(hair_color == "none" & eye_color == "black") ## ----------------------------------------------------------------------------- subset(starwars, species == "Human") subset(starwars, mass > 1000) subset(starwars, hair_color == "none" & eye_color == "black") ## ----------------------------------------------------------------------------- starwars[which(starwars$species == "Human"), , drop = FALSE] starwars[which(starwars$mass > 1000), , drop = FALSE] starwars[which(starwars$hair_color == "none" & starwars$eye_color == "black"), , drop = FALSE] ## ----------------------------------------------------------------------------- df %>% mutate(z = x + y, z2 = z ^ 2) ## ----------------------------------------------------------------------------- head(transform(df, z = x + y, z2 = (x + y) ^ 2)) ## ----------------------------------------------------------------------------- mtcars$cyl2 <- mtcars$cyl * 2 mtcars$cyl4 <- mtcars$cyl2 * 2 ## ----------------------------------------------------------------------------- gf <- tibble(g = c(1, 1, 2, 2), x = c(0.5, 1.5, 2.5, 3.5)) gf %>% group_by(g) %>% mutate(x_mean = mean(x), x_rank = rank(x)) ## ----------------------------------------------------------------------------- transform(gf, x_mean = ave(x, g, FUN = mean), x_rank = ave(x, g, FUN = rank) ) ## ----------------------------------------------------------------------------- mtcars %>% pull(1) mtcars %>% pull(cyl) ## ----------------------------------------------------------------------------- mtcars[[1]] mtcars$cyl ## ----------------------------------------------------------------------------- # to front mtcars %>% relocate(gear, carb) # to back mtcars %>% relocate(mpg, cyl, .after = last_col()) ## ----------------------------------------------------------------------------- mtcars[union(c("gear", "carb"), names(mtcars))] to_back <- c("mpg", "cyl") mtcars[c(setdiff(names(mtcars), to_back), to_back)] ## ----------------------------------------------------------------------------- iris %>% rename(sepal_length = Sepal.Length, sepal_width = 2) ## ----------------------------------------------------------------------------- iris2 <- iris names(iris2)[2] <- "sepal_width" ## ----------------------------------------------------------------------------- names(iris2)[names(iris2) == "Sepal.Length"] <- "sepal_length" ## ----------------------------------------------------------------------------- iris %>% rename_with(toupper) ## ----------------------------------------------------------------------------- setNames(iris, toupper(names(iris))) ## ----------------------------------------------------------------------------- iris %>% select(1:3) iris %>% select(Species, Sepal.Length) iris %>% select(starts_with("Petal")) iris %>% select(where(is.factor)) ## ----------------------------------------------------------------------------- iris[1:3] # single argument selects columns; never drops iris[1:3, , drop = FALSE] ## ----------------------------------------------------------------------------- iris[c("Species", "Sepal.Length")] subset(iris, select = c(Species, Sepal.Length)) ## ----------------------------------------------------------------------------- iris[grep("^Petal", names(iris))] ## ----------------------------------------------------------------------------- Filter(is.factor, iris) ## ----------------------------------------------------------------------------- mtcars %>% group_by(cyl) %>% summarise(mean = mean(disp), n = n()) ## ----------------------------------------------------------------------------- mtcars_by <- by(mtcars, mtcars$cyl, function(df) { with(df, data.frame(cyl = cyl[[1]], mean = mean(disp), n = nrow(df))) }) do.call(rbind, mtcars_by) ## ----------------------------------------------------------------------------- agg <- aggregate(disp ~ cyl, mtcars, function(x) c(mean = mean(x), n = length(x))) agg ## ----------------------------------------------------------------------------- str(agg) ## ----------------------------------------------------------------------------- slice(mtcars, 25:n()) ## ----------------------------------------------------------------------------- mtcars[25:nrow(mtcars), , drop = FALSE] ## ----------------------------------------------------------------------------- band_members %>% semi_join(band_instruments) band_members %>% anti_join(band_instruments) ## ----------------------------------------------------------------------------- band_members[band_members$name %in% band_instruments$name, , drop = FALSE] band_members[!band_members$name %in% band_instruments$name, , drop = FALSE] dplyr/inst/doc/dplyr.html0000644000176200001440000024256514525507102015165 0ustar liggesusers Introduction to dplyr

Introduction to dplyr

When working with data you must:

  • Figure out what you want to do.

  • Describe those tasks in the form of a computer program.

  • Execute the program.

The dplyr package makes these steps fast and easy:

  • By constraining your options, it helps you think about your data manipulation challenges.

  • It provides simple “verbs”, functions that correspond to the most common data manipulation tasks, to help you translate your thoughts into code.

  • It uses efficient backends, so you spend less time waiting for the computer.

This document introduces you to dplyr’s basic set of tools, and shows you how to apply them to data frames. dplyr also supports databases via the dbplyr package, once you’ve installed, read vignette("dbplyr") to learn more.

Data: starwars

To explore the basic data manipulation verbs of dplyr, we’ll use the dataset starwars. This dataset contains 87 characters and comes from the Star Wars API, and is documented in ?starwars

dim(starwars)
#> [1] 87 14
starwars
#> # A tibble: 87 × 14
#>   name      height  mass hair_color skin_color eye_color birth_year sex   gender
#>   <chr>      <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr> <chr> 
#> 1 Luke Sky…    172    77 blond      fair       blue            19   male  mascu…
#> 2 C-3PO        167    75 <NA>       gold       yellow         112   none  mascu…
#> 3 R2-D2         96    32 <NA>       white, bl… red             33   none  mascu…
#> 4 Darth Va…    202   136 none       white      yellow          41.9 male  mascu…
#> # ℹ 83 more rows
#> # ℹ 5 more variables: homeworld <chr>, species <chr>, films <list>,
#> #   vehicles <list>, starships <list>

Note that starwars is a tibble, a modern reimagining of the data frame. It’s particularly useful for large datasets because it only prints the first few rows. You can learn more about tibbles at https://tibble.tidyverse.org; in particular you can convert data frames to tibbles with as_tibble().

Single table verbs

dplyr aims to provide a function for each basic verb of data manipulation. These verbs can be organised into three categories based on the component of the dataset that they work with:

  • Rows:
    • filter() chooses rows based on column values.
    • slice() chooses rows based on location.
    • arrange() changes the order of the rows.
  • Columns:
    • select() changes whether or not a column is included.
    • rename() changes the name of columns.
    • mutate() changes the values of columns and creates new columns.
    • relocate() changes the order of the columns.
  • Groups of rows:
    • summarise() collapses a group into a single row.

The pipe

All of the dplyr functions take a data frame (or tibble) as the first argument. Rather than forcing the user to either save intermediate objects or nest functions, dplyr provides the %>% operator from magrittr. x %>% f(y) turns into f(x, y) so the result from one step is then “piped” into the next step. You can use the pipe to rewrite multiple operations that you can read left-to-right, top-to-bottom (reading the pipe operator as “then”).

Filter rows with filter()

filter() allows you to select a subset of rows in a data frame. Like all single verbs, the first argument is the tibble (or data frame). The second and subsequent arguments refer to variables within that data frame, selecting rows where the expression is TRUE.

For example, we can select all character with light skin color and brown eyes with:

starwars %>% filter(skin_color == "light", eye_color == "brown")
#> # A tibble: 7 × 14
#>   name      height  mass hair_color skin_color eye_color birth_year sex   gender
#>   <chr>      <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr> <chr> 
#> 1 Leia Org…    150    49 brown      light      brown             19 fema… femin…
#> 2 Biggs Da…    183    84 black      light      brown             24 male  mascu…
#> 3 Padmé Am…    185    45 brown      light      brown             46 fema… femin…
#> 4 Cordé        157    NA brown      light      brown             NA <NA>  <NA>  
#> # ℹ 3 more rows
#> # ℹ 5 more variables: homeworld <chr>, species <chr>, films <list>,
#> #   vehicles <list>, starships <list>

This is roughly equivalent to this base R code:

starwars[starwars$skin_color == "light" & starwars$eye_color == "brown", ]

Arrange rows with arrange()

arrange() works similarly to filter() except that instead of filtering or selecting rows, it reorders them. It takes a data frame, and a set of column names (or more complicated expressions) to order by. If you provide more than one column name, each additional column will be used to break ties in the values of preceding columns:

starwars %>% arrange(height, mass)
#> # A tibble: 87 × 14
#>   name      height  mass hair_color skin_color eye_color birth_year sex   gender
#>   <chr>      <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr> <chr> 
#> 1 Yoda          66    17 white      green      brown            896 male  mascu…
#> 2 Ratts Ty…     79    15 none       grey, blue unknown           NA male  mascu…
#> 3 Wicket S…     88    20 brown      brown      brown              8 male  mascu…
#> 4 Dud Bolt      94    45 none       blue, grey yellow            NA male  mascu…
#> # ℹ 83 more rows
#> # ℹ 5 more variables: homeworld <chr>, species <chr>, films <list>,
#> #   vehicles <list>, starships <list>

Use desc() to order a column in descending order:

starwars %>% arrange(desc(height))
#> # A tibble: 87 × 14
#>   name      height  mass hair_color skin_color eye_color birth_year sex   gender
#>   <chr>      <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr> <chr> 
#> 1 Yarael P…    264    NA none       white      yellow            NA male  mascu…
#> 2 Tarfful      234   136 brown      brown      blue              NA male  mascu…
#> 3 Lama Su      229    88 none       grey       black             NA male  mascu…
#> 4 Chewbacca    228   112 brown      unknown    blue             200 male  mascu…
#> # ℹ 83 more rows
#> # ℹ 5 more variables: homeworld <chr>, species <chr>, films <list>,
#> #   vehicles <list>, starships <list>

Choose rows using their position with slice()

slice() lets you index rows by their (integer) locations. It allows you to select, remove, and duplicate rows.

We can get characters from row numbers 5 through 10.

starwars %>% slice(5:10)
#> # A tibble: 6 × 14
#>   name      height  mass hair_color skin_color eye_color birth_year sex   gender
#>   <chr>      <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr> <chr> 
#> 1 Leia Org…    150    49 brown      light      brown             19 fema… femin…
#> 2 Owen Lars    178   120 brown, gr… light      blue              52 male  mascu…
#> 3 Beru Whi…    165    75 brown      light      blue              47 fema… femin…
#> 4 R5-D4         97    32 <NA>       white, red red               NA none  mascu…
#> # ℹ 2 more rows
#> # ℹ 5 more variables: homeworld <chr>, species <chr>, films <list>,
#> #   vehicles <list>, starships <list>

It is accompanied by a number of helpers for common use cases:

  • slice_head() and slice_tail() select the first or last rows.
starwars %>% slice_head(n = 3)
#> # A tibble: 3 × 14
#>   name      height  mass hair_color skin_color eye_color birth_year sex   gender
#>   <chr>      <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr> <chr> 
#> 1 Luke Sky…    172    77 blond      fair       blue              19 male  mascu…
#> 2 C-3PO        167    75 <NA>       gold       yellow           112 none  mascu…
#> 3 R2-D2         96    32 <NA>       white, bl… red               33 none  mascu…
#> # ℹ 5 more variables: homeworld <chr>, species <chr>, films <list>,
#> #   vehicles <list>, starships <list>
  • slice_sample() randomly selects rows. Use the option prop to choose a certain proportion of the cases.
starwars %>% slice_sample(n = 5)
#> # A tibble: 5 × 14
#>   name      height  mass hair_color skin_color eye_color birth_year sex   gender
#>   <chr>      <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr> <chr> 
#> 1 Ayla Sec…    178  55   none       blue       hazel             48 fema… femin…
#> 2 Bossk        190 113   none       green      red               53 male  mascu…
#> 3 San Hill     191  NA   none       grey       gold              NA male  mascu…
#> 4 Luminara…    170  56.2 black      yellow     blue              58 fema… femin…
#> # ℹ 1 more row
#> # ℹ 5 more variables: homeworld <chr>, species <chr>, films <list>,
#> #   vehicles <list>, starships <list>
starwars %>% slice_sample(prop = 0.1)
#> # A tibble: 8 × 14
#>   name      height  mass hair_color skin_color eye_color birth_year sex   gender
#>   <chr>      <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr> <chr> 
#> 1 Qui-Gon …    193    89 brown      fair       blue              92 male  mascu…
#> 2 Jango Fe…    183    79 black      tan        brown             66 male  mascu…
#> 3 Jocasta …    167    NA white      fair       blue              NA fema… femin…
#> 4 Zam Wese…    168    55 blonde     fair, gre… yellow            NA fema… femin…
#> # ℹ 4 more rows
#> # ℹ 5 more variables: homeworld <chr>, species <chr>, films <list>,
#> #   vehicles <list>, starships <list>

Use replace = TRUE to perform a bootstrap sample. If needed, you can weight the sample with the weight argument.

  • slice_min() and slice_max() select rows with highest or lowest values of a variable. Note that we first must choose only the values which are not NA.
starwars %>%
  filter(!is.na(height)) %>%
  slice_max(height, n = 3)
#> # A tibble: 3 × 14
#>   name      height  mass hair_color skin_color eye_color birth_year sex   gender
#>   <chr>      <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr> <chr> 
#> 1 Yarael P…    264    NA none       white      yellow            NA male  mascu…
#> 2 Tarfful      234   136 brown      brown      blue              NA male  mascu…
#> 3 Lama Su      229    88 none       grey       black             NA male  mascu…
#> # ℹ 5 more variables: homeworld <chr>, species <chr>, films <list>,
#> #   vehicles <list>, starships <list>

Select columns with select()

Often you work with large datasets with many columns but only a few are actually of interest to you. select() allows you to rapidly zoom in on a useful subset using operations that usually only work on numeric variable positions:

# Select columns by name
starwars %>% select(hair_color, skin_color, eye_color)
#> # A tibble: 87 × 3
#>   hair_color skin_color  eye_color
#>   <chr>      <chr>       <chr>    
#> 1 blond      fair        blue     
#> 2 <NA>       gold        yellow   
#> 3 <NA>       white, blue red      
#> 4 none       white       yellow   
#> # ℹ 83 more rows
# Select all columns between hair_color and eye_color (inclusive)
starwars %>% select(hair_color:eye_color)
#> # A tibble: 87 × 3
#>   hair_color skin_color  eye_color
#>   <chr>      <chr>       <chr>    
#> 1 blond      fair        blue     
#> 2 <NA>       gold        yellow   
#> 3 <NA>       white, blue red      
#> 4 none       white       yellow   
#> # ℹ 83 more rows
# Select all columns except those from hair_color to eye_color (inclusive)
starwars %>% select(!(hair_color:eye_color))
#> # A tibble: 87 × 11
#>   name     height  mass birth_year sex   gender homeworld species films vehicles
#>   <chr>     <int> <dbl>      <dbl> <chr> <chr>  <chr>     <chr>   <lis> <list>  
#> 1 Luke Sk…    172    77       19   male  mascu… Tatooine  Human   <chr> <chr>   
#> 2 C-3PO       167    75      112   none  mascu… Tatooine  Droid   <chr> <chr>   
#> 3 R2-D2        96    32       33   none  mascu… Naboo     Droid   <chr> <chr>   
#> 4 Darth V…    202   136       41.9 male  mascu… Tatooine  Human   <chr> <chr>   
#> # ℹ 83 more rows
#> # ℹ 1 more variable: starships <list>
# Select all columns ending with color
starwars %>% select(ends_with("color"))
#> # A tibble: 87 × 3
#>   hair_color skin_color  eye_color
#>   <chr>      <chr>       <chr>    
#> 1 blond      fair        blue     
#> 2 <NA>       gold        yellow   
#> 3 <NA>       white, blue red      
#> 4 none       white       yellow   
#> # ℹ 83 more rows

There are a number of helper functions you can use within select(), like starts_with(), ends_with(), matches() and contains(). These let you quickly match larger blocks of variables that meet some criterion. See ?select for more details.

You can rename variables with select() by using named arguments:

starwars %>% select(home_world = homeworld)
#> # A tibble: 87 × 1
#>   home_world
#>   <chr>     
#> 1 Tatooine  
#> 2 Tatooine  
#> 3 Naboo     
#> 4 Tatooine  
#> # ℹ 83 more rows

But because select() drops all the variables not explicitly mentioned, it’s not that useful. Instead, use rename():

starwars %>% rename(home_world = homeworld)
#> # A tibble: 87 × 14
#>   name      height  mass hair_color skin_color eye_color birth_year sex   gender
#>   <chr>      <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr> <chr> 
#> 1 Luke Sky…    172    77 blond      fair       blue            19   male  mascu…
#> 2 C-3PO        167    75 <NA>       gold       yellow         112   none  mascu…
#> 3 R2-D2         96    32 <NA>       white, bl… red             33   none  mascu…
#> 4 Darth Va…    202   136 none       white      yellow          41.9 male  mascu…
#> # ℹ 83 more rows
#> # ℹ 5 more variables: home_world <chr>, species <chr>, films <list>,
#> #   vehicles <list>, starships <list>

Add new columns with mutate()

Besides selecting sets of existing columns, it’s often useful to add new columns that are functions of existing columns. This is the job of mutate():

starwars %>% mutate(height_m = height / 100)
#> # A tibble: 87 × 15
#>   name      height  mass hair_color skin_color eye_color birth_year sex   gender
#>   <chr>      <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr> <chr> 
#> 1 Luke Sky…    172    77 blond      fair       blue            19   male  mascu…
#> 2 C-3PO        167    75 <NA>       gold       yellow         112   none  mascu…
#> 3 R2-D2         96    32 <NA>       white, bl… red             33   none  mascu…
#> 4 Darth Va…    202   136 none       white      yellow          41.9 male  mascu…
#> # ℹ 83 more rows
#> # ℹ 6 more variables: homeworld <chr>, species <chr>, films <list>,
#> #   vehicles <list>, starships <list>, height_m <dbl>

We can’t see the height in meters we just calculated, but we can fix that using a select command.

starwars %>%
  mutate(height_m = height / 100) %>%
  select(height_m, height, everything())
#> # A tibble: 87 × 15
#>   height_m height name     mass hair_color skin_color eye_color birth_year sex  
#>      <dbl>  <int> <chr>   <dbl> <chr>      <chr>      <chr>          <dbl> <chr>
#> 1     1.72    172 Luke S…    77 blond      fair       blue            19   male 
#> 2     1.67    167 C-3PO      75 <NA>       gold       yellow         112   none 
#> 3     0.96     96 R2-D2      32 <NA>       white, bl… red             33   none 
#> 4     2.02    202 Darth …   136 none       white      yellow          41.9 male 
#> # ℹ 83 more rows
#> # ℹ 6 more variables: gender <chr>, homeworld <chr>, species <chr>,
#> #   films <list>, vehicles <list>, starships <list>

dplyr::mutate() is similar to the base transform(), but allows you to refer to columns that you’ve just created:

starwars %>%
  mutate(
    height_m = height / 100,
    BMI = mass / (height_m^2)
  ) %>%
  select(BMI, everything())
#> # A tibble: 87 × 16
#>     BMI name       height  mass hair_color skin_color eye_color birth_year sex  
#>   <dbl> <chr>       <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr>
#> 1  26.0 Luke Skyw…    172    77 blond      fair       blue            19   male 
#> 2  26.9 C-3PO         167    75 <NA>       gold       yellow         112   none 
#> 3  34.7 R2-D2          96    32 <NA>       white, bl… red             33   none 
#> 4  33.3 Darth Vad…    202   136 none       white      yellow          41.9 male 
#> # ℹ 83 more rows
#> # ℹ 7 more variables: gender <chr>, homeworld <chr>, species <chr>,
#> #   films <list>, vehicles <list>, starships <list>, height_m <dbl>

If you only want to keep the new variables, use .keep = "none":

starwars %>%
  mutate(
    height_m = height / 100,
    BMI = mass / (height_m^2),
    .keep = "none"
  )
#> # A tibble: 87 × 2
#>   height_m   BMI
#>      <dbl> <dbl>
#> 1     1.72  26.0
#> 2     1.67  26.9
#> 3     0.96  34.7
#> 4     2.02  33.3
#> # ℹ 83 more rows

Change column order with relocate()

Use a similar syntax as select() to move blocks of columns at once

starwars %>% relocate(sex:homeworld, .before = height)
#> # A tibble: 87 × 14
#>   name       sex   gender homeworld height  mass hair_color skin_color eye_color
#>   <chr>      <chr> <chr>  <chr>      <int> <dbl> <chr>      <chr>      <chr>    
#> 1 Luke Skyw… male  mascu… Tatooine     172    77 blond      fair       blue     
#> 2 C-3PO      none  mascu… Tatooine     167    75 <NA>       gold       yellow   
#> 3 R2-D2      none  mascu… Naboo         96    32 <NA>       white, bl… red      
#> 4 Darth Vad… male  mascu… Tatooine     202   136 none       white      yellow   
#> # ℹ 83 more rows
#> # ℹ 5 more variables: birth_year <dbl>, species <chr>, films <list>,
#> #   vehicles <list>, starships <list>

Summarise values with summarise()

The last verb is summarise(). It collapses a data frame to a single row.

starwars %>% summarise(height = mean(height, na.rm = TRUE))
#> # A tibble: 1 × 1
#>   height
#>    <dbl>
#> 1   175.

It’s not that useful until we learn the group_by() verb below.

Commonalities

You may have noticed that the syntax and function of all these verbs are very similar:

  • The first argument is a data frame.

  • The subsequent arguments describe what to do with the data frame. You can refer to columns in the data frame directly without using $.

  • The result is a new data frame

Together these properties make it easy to chain together multiple simple steps to achieve a complex result.

These five functions provide the basis of a language of data manipulation. At the most basic level, you can only alter a tidy data frame in five useful ways: you can reorder the rows (arrange()), pick observations and variables of interest (filter() and select()), add new variables that are functions of existing variables (mutate()), or collapse many values to a summary (summarise()).

Combining functions with %>%

The dplyr API is functional in the sense that function calls don’t have side-effects. You must always save their results. This doesn’t lead to particularly elegant code, especially if you want to do many operations at once. You either have to do it step-by-step:

a1 <- group_by(starwars, species, sex)
a2 <- select(a1, height, mass)
a3 <- summarise(a2,
  height = mean(height, na.rm = TRUE),
  mass = mean(mass, na.rm = TRUE)
)

Or if you don’t want to name the intermediate results, you need to wrap the function calls inside each other:

summarise(
  select(
    group_by(starwars, species, sex),
    height, mass
  ),
  height = mean(height, na.rm = TRUE),
  mass = mean(mass, na.rm = TRUE)
)
#> Adding missing grouping variables: `species`, `sex`
#> `summarise()` has grouped output by 'species'. You can override using the
#> `.groups` argument.
#> # A tibble: 41 × 4
#> # Groups:   species [38]
#>   species  sex   height  mass
#>   <chr>    <chr>  <dbl> <dbl>
#> 1 Aleena   male      79    15
#> 2 Besalisk male     198   102
#> 3 Cerean   male     198    82
#> 4 Chagrian male     196   NaN
#> # ℹ 37 more rows

This is difficult to read because the order of the operations is from inside to out. Thus, the arguments are a long way away from the function. To get around this problem, dplyr provides the %>% operator from magrittr. x %>% f(y) turns into f(x, y) so you can use it to rewrite multiple operations that you can read left-to-right, top-to-bottom (reading the pipe operator as “then”):

starwars %>%
  group_by(species, sex) %>%
  select(height, mass) %>%
  summarise(
    height = mean(height, na.rm = TRUE),
    mass = mean(mass, na.rm = TRUE)
  )

Patterns of operations

The dplyr verbs can be classified by the type of operations they accomplish (we sometimes speak of their semantics, i.e., their meaning). It’s helpful to have a good grasp of the difference between select and mutate operations.

Selecting operations

One of the appealing features of dplyr is that you can refer to columns from the tibble as if they were regular variables. However, the syntactic uniformity of referring to bare column names hides semantical differences across the verbs. A column symbol supplied to select() does not have the same meaning as the same symbol supplied to mutate().

Selecting operations expect column names and positions. Hence, when you call select() with bare variable names, they actually represent their own positions in the tibble. The following calls are completely equivalent from dplyr’s point of view:

# `name` represents the integer 1
select(starwars, name)
#> # A tibble: 87 × 1
#>   name          
#>   <chr>         
#> 1 Luke Skywalker
#> 2 C-3PO         
#> 3 R2-D2         
#> 4 Darth Vader   
#> # ℹ 83 more rows
select(starwars, 1)
#> # A tibble: 87 × 1
#>   name          
#>   <chr>         
#> 1 Luke Skywalker
#> 2 C-3PO         
#> 3 R2-D2         
#> 4 Darth Vader   
#> # ℹ 83 more rows

By the same token, this means that you cannot refer to variables from the surrounding context if they have the same name as one of the columns. In the following example, height still represents 2, not 5:

height <- 5
select(starwars, height)
#> # A tibble: 87 × 1
#>   height
#>    <int>
#> 1    172
#> 2    167
#> 3     96
#> 4    202
#> # ℹ 83 more rows

One useful subtlety is that this only applies to bare names and to selecting calls like c(height, mass) or height:mass. In all other cases, the columns of the data frame are not put in scope. This allows you to refer to contextual variables in selection helpers:

name <- "color"
select(starwars, ends_with(name))
#> # A tibble: 87 × 3
#>   hair_color skin_color  eye_color
#>   <chr>      <chr>       <chr>    
#> 1 blond      fair        blue     
#> 2 <NA>       gold        yellow   
#> 3 <NA>       white, blue red      
#> 4 none       white       yellow   
#> # ℹ 83 more rows

These semantics are usually intuitive. But note the subtle difference:

name <- 5
select(starwars, name, identity(name))
#> # A tibble: 87 × 2
#>   name           skin_color 
#>   <chr>          <chr>      
#> 1 Luke Skywalker fair       
#> 2 C-3PO          gold       
#> 3 R2-D2          white, blue
#> 4 Darth Vader    white      
#> # ℹ 83 more rows

In the first argument, name represents its own position 1. In the second argument, name is evaluated in the surrounding context and represents the fifth column.

For a long time, select() used to only understand column positions. Counting from dplyr 0.6, it now understands column names as well. This makes it a bit easier to program with select():

vars <- c("name", "height")
select(starwars, all_of(vars), "mass")
#> # A tibble: 87 × 3
#>   name           height  mass
#>   <chr>           <int> <dbl>
#> 1 Luke Skywalker    172    77
#> 2 C-3PO             167    75
#> 3 R2-D2              96    32
#> 4 Darth Vader       202   136
#> # ℹ 83 more rows

Mutating operations

Mutate semantics are quite different from selection semantics. Whereas select() expects column names or positions, mutate() expects column vectors. We will set up a smaller tibble to use for our examples.

df <- starwars %>% select(name, height, mass)

When we use select(), the bare column names stand for their own positions in the tibble. For mutate() on the other hand, column symbols represent the actual column vectors stored in the tibble. Consider what happens if we give a string or a number to mutate():

mutate(df, "height", 2)
#> # A tibble: 87 × 5
#>   name           height  mass `"height"`   `2`
#>   <chr>           <int> <dbl> <chr>      <dbl>
#> 1 Luke Skywalker    172    77 height         2
#> 2 C-3PO             167    75 height         2
#> 3 R2-D2              96    32 height         2
#> 4 Darth Vader       202   136 height         2
#> # ℹ 83 more rows

mutate() gets length-1 vectors that it interprets as new columns in the data frame. These vectors are recycled so they match the number of rows. That’s why it doesn’t make sense to supply expressions like "height" + 10 to mutate(). This amounts to adding 10 to a string! The correct expression is:

mutate(df, height + 10)
#> # A tibble: 87 × 4
#>   name           height  mass `height + 10`
#>   <chr>           <int> <dbl>         <dbl>
#> 1 Luke Skywalker    172    77           182
#> 2 C-3PO             167    75           177
#> 3 R2-D2              96    32           106
#> 4 Darth Vader       202   136           212
#> # ℹ 83 more rows

In the same way, you can unquote values from the context if these values represent a valid column. They must be either length 1 (they then get recycled) or have the same length as the number of rows. In the following example we create a new vector that we add to the data frame:

var <- seq(1, nrow(df))
mutate(df, new = var)
#> # A tibble: 87 × 4
#>   name           height  mass   new
#>   <chr>           <int> <dbl> <int>
#> 1 Luke Skywalker    172    77     1
#> 2 C-3PO             167    75     2
#> 3 R2-D2              96    32     3
#> 4 Darth Vader       202   136     4
#> # ℹ 83 more rows

A case in point is group_by(). While you might think it has select semantics, it actually has mutate semantics. This is quite handy as it allows to group by a modified column:

group_by(starwars, sex)
#> # A tibble: 87 × 14
#> # Groups:   sex [5]
#>   name      height  mass hair_color skin_color eye_color birth_year sex   gender
#>   <chr>      <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr> <chr> 
#> 1 Luke Sky…    172    77 blond      fair       blue            19   male  mascu…
#> 2 C-3PO        167    75 <NA>       gold       yellow         112   none  mascu…
#> 3 R2-D2         96    32 <NA>       white, bl… red             33   none  mascu…
#> 4 Darth Va…    202   136 none       white      yellow          41.9 male  mascu…
#> # ℹ 83 more rows
#> # ℹ 5 more variables: homeworld <chr>, species <chr>, films <list>,
#> #   vehicles <list>, starships <list>
group_by(starwars, sex = as.factor(sex))
#> # A tibble: 87 × 14
#> # Groups:   sex [5]
#>   name      height  mass hair_color skin_color eye_color birth_year sex   gender
#>   <chr>      <int> <dbl> <chr>      <chr>      <chr>          <dbl> <fct> <chr> 
#> 1 Luke Sky…    172    77 blond      fair       blue            19   male  mascu…
#> 2 C-3PO        167    75 <NA>       gold       yellow         112   none  mascu…
#> 3 R2-D2         96    32 <NA>       white, bl… red             33   none  mascu…
#> 4 Darth Va…    202   136 none       white      yellow          41.9 male  mascu…
#> # ℹ 83 more rows
#> # ℹ 5 more variables: homeworld <chr>, species <chr>, films <list>,
#> #   vehicles <list>, starships <list>
group_by(starwars, height_binned = cut(height, 3))
#> # A tibble: 87 × 15
#> # Groups:   height_binned [4]
#>   name      height  mass hair_color skin_color eye_color birth_year sex   gender
#>   <chr>      <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr> <chr> 
#> 1 Luke Sky…    172    77 blond      fair       blue            19   male  mascu…
#> 2 C-3PO        167    75 <NA>       gold       yellow         112   none  mascu…
#> 3 R2-D2         96    32 <NA>       white, bl… red             33   none  mascu…
#> 4 Darth Va…    202   136 none       white      yellow          41.9 male  mascu…
#> # ℹ 83 more rows
#> # ℹ 6 more variables: homeworld <chr>, species <chr>, films <list>,
#> #   vehicles <list>, starships <list>, height_binned <fct>

This is why you can’t supply a column name to group_by(). This amounts to creating a new column containing the string recycled to the number of rows:

group_by(df, "month")
#> # A tibble: 87 × 4
#> # Groups:   "month" [1]
#>   name           height  mass `"month"`
#>   <chr>           <int> <dbl> <chr>    
#> 1 Luke Skywalker    172    77 month    
#> 2 C-3PO             167    75 month    
#> 3 R2-D2              96    32 month    
#> 4 Darth Vader       202   136 month    
#> # ℹ 83 more rows
dplyr/inst/doc/grouping.html0000644000176200001440000016364614525507103015670 0ustar liggesusers Grouped data

Grouped data

dplyr verbs are particularly powerful when you apply them to grouped data frames (grouped_df objects). This vignette shows you:

  • How to group, inspect, and ungroup with group_by() and friends.

  • How individual dplyr verbs changes their behaviour when applied to grouped data frame.

  • How to access data about the “current” group from within a verb.

We’ll start by loading dplyr:

library(dplyr)

group_by()

The most important grouping verb is group_by(): it takes a data frame and one or more variables to group by:

by_species <- starwars %>% group_by(species)
by_sex_gender <- starwars %>% group_by(sex, gender)

You can see the grouping when you print the data:

by_species
#> # A tibble: 87 × 14
#> # Groups:   species [38]
#>   name      height  mass hair_color skin_color eye_color birth_year sex   gender
#>   <chr>      <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr> <chr> 
#> 1 Luke Sky…    172    77 blond      fair       blue            19   male  mascu…
#> 2 C-3PO        167    75 <NA>       gold       yellow         112   none  mascu…
#> 3 R2-D2         96    32 <NA>       white, bl… red             33   none  mascu…
#> 4 Darth Va…    202   136 none       white      yellow          41.9 male  mascu…
#> # ℹ 83 more rows
#> # ℹ 5 more variables: homeworld <chr>, species <chr>, films <list>,
#> #   vehicles <list>, starships <list>
by_sex_gender
#> # A tibble: 87 × 14
#> # Groups:   sex, gender [6]
#>   name      height  mass hair_color skin_color eye_color birth_year sex   gender
#>   <chr>      <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr> <chr> 
#> 1 Luke Sky…    172    77 blond      fair       blue            19   male  mascu…
#> 2 C-3PO        167    75 <NA>       gold       yellow         112   none  mascu…
#> 3 R2-D2         96    32 <NA>       white, bl… red             33   none  mascu…
#> 4 Darth Va…    202   136 none       white      yellow          41.9 male  mascu…
#> # ℹ 83 more rows
#> # ℹ 5 more variables: homeworld <chr>, species <chr>, films <list>,
#> #   vehicles <list>, starships <list>

Or use tally() to count the number of rows in each group. The sort argument is useful if you want to see the largest groups up front.

by_species %>% tally()
#> # A tibble: 38 × 2
#>   species      n
#>   <chr>    <int>
#> 1 Aleena       1
#> 2 Besalisk     1
#> 3 Cerean       1
#> 4 Chagrian     1
#> # ℹ 34 more rows

by_sex_gender %>% tally(sort = TRUE)
#> # A tibble: 6 × 3
#> # Groups:   sex [5]
#>   sex    gender        n
#>   <chr>  <chr>     <int>
#> 1 male   masculine    60
#> 2 female feminine     16
#> 3 none   masculine     5
#> 4 <NA>   <NA>          4
#> # ℹ 2 more rows

As well as grouping by existing variables, you can group by any function of existing variables. This is equivalent to performing a mutate() before the group_by():

bmi_breaks <- c(0, 18.5, 25, 30, Inf)

starwars %>%
  group_by(bmi_cat = cut(mass/(height/100)^2, breaks=bmi_breaks)) %>%
  tally()
#> # A tibble: 5 × 2
#>   bmi_cat       n
#>   <fct>     <int>
#> 1 (0,18.5]     10
#> 2 (18.5,25]    24
#> 3 (25,30]      13
#> 4 (30,Inf]     12
#> # ℹ 1 more row

Group metadata

You can see underlying group data with group_keys(). It has one row for each group and one column for each grouping variable:

by_species %>% group_keys()
#> # A tibble: 38 × 1
#>   species 
#>   <chr>   
#> 1 Aleena  
#> 2 Besalisk
#> 3 Cerean  
#> 4 Chagrian
#> # ℹ 34 more rows

by_sex_gender %>% group_keys()
#> # A tibble: 6 × 2
#>   sex            gender   
#>   <chr>          <chr>    
#> 1 female         feminine 
#> 2 hermaphroditic masculine
#> 3 male           masculine
#> 4 none           feminine 
#> # ℹ 2 more rows

You can see which group each row belongs to with group_indices():

by_species %>% group_indices()
#>  [1] 11  6  6 11 11 11 11  6 11 11 11 11 34 11 24 12 11 38 36 11 11  6 31 11 11
#> [26] 18 11 11  8 26 11 21 11 11 10 10 10 11 30  7 11 11 37 32 32  1 33 35 29 11
#> [51]  3 20 37 27 13 23 16  4 38 38 11  9 17 17 11 11 11 11  5  2 15 15 11  6 25
#> [76] 19 28 14 34 11 38 22 11 11 11  6 11

And which rows each group contains with group_rows():

by_species %>% group_rows() %>% head()
#> <list_of<integer>[6]>
#> [[1]]
#> [1] 46
#> 
#> [[2]]
#> [1] 70
#> 
#> [[3]]
#> [1] 51
#> 
#> [[4]]
#> [1] 58
#> 
#> [[5]]
#> [1] 69
#> 
#> [[6]]
#> [1]  2  3  8 22 74 86

Use group_vars() if you just want the names of the grouping variables:

by_species %>% group_vars()
#> [1] "species"
by_sex_gender %>% group_vars()
#> [1] "sex"    "gender"

Changing and adding to grouping variables

If you apply group_by() to an already grouped dataset, will overwrite the existing grouping variables. For example, the following code groups by homeworld instead of species:

by_species %>%
  group_by(homeworld) %>%
  tally()
#> # A tibble: 49 × 2
#>   homeworld       n
#>   <chr>       <int>
#> 1 Alderaan        3
#> 2 Aleen Minor     1
#> 3 Bespin          1
#> 4 Bestine IV      1
#> # ℹ 45 more rows

To augment the grouping, using .add = TRUE1. For example, the following code groups by species and homeworld:

by_species %>%
  group_by(homeworld, .add = TRUE) %>%
  tally()
#> # A tibble: 57 × 3
#> # Groups:   species [38]
#>   species  homeworld       n
#>   <chr>    <chr>       <int>
#> 1 Aleena   Aleen Minor     1
#> 2 Besalisk Ojom            1
#> 3 Cerean   Cerea           1
#> 4 Chagrian Champala        1
#> # ℹ 53 more rows

Removing grouping variables

To remove all grouping variables, use ungroup():

by_species %>%
  ungroup() %>%
  tally()
#> # A tibble: 1 × 1
#>       n
#>   <int>
#> 1    87

You can also choose to selectively ungroup by listing the variables you want to remove:

by_sex_gender %>% 
  ungroup(sex) %>% 
  tally()
#> # A tibble: 3 × 2
#>   gender        n
#>   <chr>     <int>
#> 1 feminine     17
#> 2 masculine    66
#> 3 <NA>          4

Verbs

The following sections describe how grouping affects the main dplyr verbs.

summarise()

summarise() computes a summary for each group. This means that it starts from group_keys(), adding summary variables to the right hand side:

by_species %>%
  summarise(
    n = n(),
    height = mean(height, na.rm = TRUE)
  )
#> # A tibble: 38 × 3
#>   species      n height
#>   <chr>    <int>  <dbl>
#> 1 Aleena       1     79
#> 2 Besalisk     1    198
#> 3 Cerean       1    198
#> 4 Chagrian     1    196
#> # ℹ 34 more rows

The .groups= argument controls the grouping structure of the output. The historical behaviour of removing the right hand side grouping variable corresponds to .groups = "drop_last" without a message or .groups = NULL with a message (the default).

by_sex_gender %>% 
  summarise(n = n()) %>% 
  group_vars()
#> `summarise()` has grouped output by 'sex'. You can override using the `.groups`
#> argument.
#> [1] "sex"

by_sex_gender %>% 
  summarise(n = n(), .groups = "drop_last") %>% 
  group_vars()
#> [1] "sex"

Since version 1.0.0 the groups may also be kept (.groups = "keep") or dropped (.groups = "drop").

by_sex_gender %>% 
  summarise(n = n(), .groups = "keep") %>% 
  group_vars()
#> [1] "sex"    "gender"

by_sex_gender %>% 
  summarise(n = n(), .groups = "drop") %>% 
  group_vars()
#> character(0)

When the output no longer have grouping variables, it becomes ungrouped (i.e. a regular tibble).

select(), rename(), and relocate()

rename() and relocate() behave identically with grouped and ungrouped data because they only affect the name or position of existing columns. Grouped select() is almost identical to ungrouped select, except that it always includes the grouping variables:

by_species %>% select(mass)
#> Adding missing grouping variables: `species`
#> # A tibble: 87 × 2
#> # Groups:   species [38]
#>   species  mass
#>   <chr>   <dbl>
#> 1 Human      77
#> 2 Droid      75
#> 3 Droid      32
#> 4 Human     136
#> # ℹ 83 more rows

If you don’t want the grouping variables, you’ll have to first ungroup(). (This design is possibly a mistake, but we’re stuck with it for now.)

arrange()

Grouped arrange() is the same as ungrouped arrange(), unless you set .by_group = TRUE, in which case it will order first by the grouping variables.

by_species %>%
  arrange(desc(mass)) %>%
  relocate(species, mass)
#> # A tibble: 87 × 14
#> # Groups:   species [38]
#>   species  mass name     height hair_color skin_color eye_color birth_year sex  
#>   <chr>   <dbl> <chr>     <int> <chr>      <chr>      <chr>          <dbl> <chr>
#> 1 Hutt     1358 Jabba D…    175 <NA>       green-tan… orange         600   herm…
#> 2 Kaleesh   159 Grievous    216 none       brown, wh… green, y…       NA   male 
#> 3 Droid     140 IG-88       200 none       metal      red             15   none 
#> 4 Human     136 Darth V…    202 none       white      yellow          41.9 male 
#> # ℹ 83 more rows
#> # ℹ 5 more variables: gender <chr>, homeworld <chr>, films <list>,
#> #   vehicles <list>, starships <list>

by_species %>%
  arrange(desc(mass), .by_group = TRUE) %>%
  relocate(species, mass)
#> # A tibble: 87 × 14
#> # Groups:   species [38]
#>   species   mass name    height hair_color skin_color eye_color birth_year sex  
#>   <chr>    <dbl> <chr>    <int> <chr>      <chr>      <chr>          <dbl> <chr>
#> 1 Aleena      15 Ratts …     79 none       grey, blue unknown           NA male 
#> 2 Besalisk   102 Dexter…    198 none       brown      yellow            NA male 
#> 3 Cerean      82 Ki-Adi…    198 white      pale       yellow            92 male 
#> 4 Chagrian    NA Mas Am…    196 none       blue       blue              NA male 
#> # ℹ 83 more rows
#> # ℹ 5 more variables: gender <chr>, homeworld <chr>, films <list>,
#> #   vehicles <list>, starships <list>

Note that second example is sorted by species (from the group_by() statement) and then by mass (within species).

mutate()

In simple cases with vectorised functions, grouped and ungrouped mutate() give the same results. They differ when used with summary functions:

# Subtract off global mean
starwars %>% 
  select(name, homeworld, mass) %>% 
  mutate(standard_mass = mass - mean(mass, na.rm = TRUE))
#> # A tibble: 87 × 4
#>   name           homeworld  mass standard_mass
#>   <chr>          <chr>     <dbl>         <dbl>
#> 1 Luke Skywalker Tatooine     77         -20.3
#> 2 C-3PO          Tatooine     75         -22.3
#> 3 R2-D2          Naboo        32         -65.3
#> 4 Darth Vader    Tatooine    136          38.7
#> # ℹ 83 more rows

# Subtract off homeworld mean
starwars %>% 
  select(name, homeworld, mass) %>% 
  group_by(homeworld) %>% 
  mutate(standard_mass = mass - mean(mass, na.rm = TRUE))
#> # A tibble: 87 × 4
#> # Groups:   homeworld [49]
#>   name           homeworld  mass standard_mass
#>   <chr>          <chr>     <dbl>         <dbl>
#> 1 Luke Skywalker Tatooine     77         -8.38
#> 2 C-3PO          Tatooine     75        -10.4 
#> 3 R2-D2          Naboo        32        -32.2 
#> 4 Darth Vader    Tatooine    136         50.6 
#> # ℹ 83 more rows

Or with window functions like min_rank():

# Overall rank
starwars %>% 
  select(name, homeworld, height) %>% 
  mutate(rank = min_rank(height))
#> # A tibble: 87 × 4
#>   name           homeworld height  rank
#>   <chr>          <chr>      <int> <int>
#> 1 Luke Skywalker Tatooine     172    28
#> 2 C-3PO          Tatooine     167    20
#> 3 R2-D2          Naboo         96     5
#> 4 Darth Vader    Tatooine     202    72
#> # ℹ 83 more rows

# Rank per homeworld
starwars %>% 
  select(name, homeworld, height) %>% 
  group_by(homeworld) %>% 
  mutate(rank = min_rank(height))
#> # A tibble: 87 × 4
#> # Groups:   homeworld [49]
#>   name           homeworld height  rank
#>   <chr>          <chr>      <int> <int>
#> 1 Luke Skywalker Tatooine     172     5
#> 2 C-3PO          Tatooine     167     4
#> 3 R2-D2          Naboo         96     1
#> 4 Darth Vader    Tatooine     202    10
#> # ℹ 83 more rows

filter()

A grouped filter() effectively does a mutate() to generate a logical variable, and then only keeps the rows where the variable is TRUE. This means that grouped filters can be used with summary functions. For example, we can find the tallest character of each species:

by_species %>%
  select(name, species, height) %>% 
  filter(height == max(height))
#> # A tibble: 36 × 3
#> # Groups:   species [36]
#>   name                  species        height
#>   <chr>                 <chr>           <int>
#> 1 Greedo                Rodian            173
#> 2 Jabba Desilijic Tiure Hutt              175
#> 3 Yoda                  Yoda's species     66
#> 4 Bossk                 Trandoshan        190
#> # ℹ 32 more rows

You can also use filter() to remove entire groups. For example, the following code eliminates all groups that only have a single member:

by_species %>%
  filter(n() != 1) %>% 
  tally()
#> # A tibble: 9 × 2
#>   species      n
#>   <chr>    <int>
#> 1 Droid        6
#> 2 Gungan       3
#> 3 Human       35
#> 4 Kaminoan     2
#> # ℹ 5 more rows

slice() and friends

slice() and friends (slice_head(), slice_tail(), slice_sample(), slice_min() and slice_max()) select rows within a group. For example, we can select the first observation within each species:

by_species %>%
  relocate(species) %>% 
  slice(1)
#> # A tibble: 38 × 14
#> # Groups:   species [38]
#>   species  name    height  mass hair_color skin_color eye_color birth_year sex  
#>   <chr>    <chr>    <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr>
#> 1 Aleena   Ratts …     79    15 none       grey, blue unknown           NA male 
#> 2 Besalisk Dexter…    198   102 none       brown      yellow            NA male 
#> 3 Cerean   Ki-Adi…    198    82 white      pale       yellow            92 male 
#> 4 Chagrian Mas Am…    196    NA none       blue       blue              NA male 
#> # ℹ 34 more rows
#> # ℹ 5 more variables: gender <chr>, homeworld <chr>, films <list>,
#> #   vehicles <list>, starships <list>

Similarly, we can use slice_min() to select the smallest n values of a variable:

by_species %>%
  filter(!is.na(height)) %>% 
  slice_min(height, n = 2)
#> # A tibble: 47 × 14
#> # Groups:   species [38]
#>   name      height  mass hair_color skin_color eye_color birth_year sex   gender
#>   <chr>      <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr> <chr> 
#> 1 Ratts Ty…     79    15 none       grey, blue unknown           NA male  mascu…
#> 2 Dexter J…    198   102 none       brown      yellow            NA male  mascu…
#> 3 Ki-Adi-M…    198    82 white      pale       yellow            92 male  mascu…
#> 4 Mas Amed…    196    NA none       blue       blue              NA male  mascu…
#> # ℹ 43 more rows
#> # ℹ 5 more variables: homeworld <chr>, species <chr>, films <list>,
#> #   vehicles <list>, starships <list>

  1. Note that the argument changed from add = TRUE to .add = TRUE in dplyr 1.0.0.↩︎

dplyr/inst/doc/base.html0000644000176200001440000025565114525507100014743 0ustar liggesusers dplyr <-> base R

dplyr <-> base R

This vignette compares dplyr functions to their base R equivalents. This helps those familiar with base R understand better what dplyr does, and shows dplyr users how you might express the same ideas in base R code. We’ll start with a rough overview of the major differences, then discuss the one table verbs in more detail, followed by the two table verbs.

Overview

  1. The code dplyr verbs input and output data frames. This contrasts with base R functions which more frequently work with individual vectors.

  2. dplyr relies heavily on “non-standard evaluation” so that you don’t need to use $ to refer to columns in the “current” data frame. This behaviour is inspired by the base functions subset() and transform().

  3. dplyr solutions tend to use a variety of single purpose verbs, while base R solutions typically tend to use [ in a variety of ways, depending on the task at hand.

  4. Multiple dplyr verbs are often strung together into a pipeline by %>%. In base R, you’ll typically save intermediate results to a variable that you either discard, or repeatedly overwrite.

  5. All dplyr verbs handle “grouped” data frames so that the code to perform a computation per-group looks very similar to code that works on a whole data frame. In base R, per-group operations tend to have varied forms.

One table verbs

The following table shows a condensed translation between dplyr verbs and their base R equivalents. The following sections describe each operation in more detail. You’ll learn more about the dplyr verbs in their documentation and in vignette("dplyr").

dplyr base
arrange(df, x) df[order(x), , drop = FALSE]
distinct(df, x) df[!duplicated(x), , drop = FALSE], unique()
filter(df, x) df[which(x), , drop = FALSE], subset()
mutate(df, z = x + y) df$z <- df$x + df$y, transform()
pull(df, 1) df[[1]]
pull(df, x) df$x
rename(df, y = x) names(df)[names(df) == "x"] <- "y"
relocate(df, y) df[union("y", names(df))]
select(df, x, y) df[c("x", "y")], subset()
select(df, starts_with("x")) df[grepl("^x", names(df))]
summarise(df, mean(x)) mean(df$x), tapply(), aggregate(), by()
slice(df, c(1, 2, 5)) df[c(1, 2, 5), , drop = FALSE]

To begin, we’ll load dplyr and convert mtcars and iris to tibbles so that we can easily show only abbreviated output for each operation.

library(dplyr)
mtcars <- as_tibble(mtcars)
iris <- as_tibble(iris)

arrange(): Arrange rows by variables

dplyr::arrange() orders the rows of a data frame by the values of one or more columns:

mtcars %>% arrange(cyl, disp)
#> # A tibble: 32 × 11
#>     mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb
#>   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1  33.9     4  71.1    65  4.22  1.84  19.9     1     1     4     1
#> 2  30.4     4  75.7    52  4.93  1.62  18.5     1     1     4     2
#> 3  32.4     4  78.7    66  4.08  2.2   19.5     1     1     4     1
#> 4  27.3     4  79      66  4.08  1.94  18.9     1     1     4     1
#> # ℹ 28 more rows

The desc() helper allows you to order selected variables in descending order:

mtcars %>% arrange(desc(cyl), desc(disp))
#> # A tibble: 32 × 11
#>     mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb
#>   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1  10.4     8   472   205  2.93  5.25  18.0     0     0     3     4
#> 2  10.4     8   460   215  3     5.42  17.8     0     0     3     4
#> 3  14.7     8   440   230  3.23  5.34  17.4     0     0     3     4
#> 4  19.2     8   400   175  3.08  3.84  17.0     0     0     3     2
#> # ℹ 28 more rows

We can replicate in base R by using [ with order():

mtcars[order(mtcars$cyl, mtcars$disp), , drop = FALSE]
#> # A tibble: 32 × 11
#>     mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb
#>   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1  33.9     4  71.1    65  4.22  1.84  19.9     1     1     4     1
#> 2  30.4     4  75.7    52  4.93  1.62  18.5     1     1     4     2
#> 3  32.4     4  78.7    66  4.08  2.2   19.5     1     1     4     1
#> 4  27.3     4  79      66  4.08  1.94  18.9     1     1     4     1
#> # ℹ 28 more rows

Note the use of drop = FALSE. If you forget this, and the input is a data frame with a single column, the output will be a vector, not a data frame. This is a source of subtle bugs.

Base R does not provide a convenient and general way to sort individual variables in descending order, so you have two options:

  • For numeric variables, you can use -x.
  • You can request order() to sort all variables in descending order.
mtcars[order(mtcars$cyl, mtcars$disp, decreasing = TRUE), , drop = FALSE]
mtcars[order(-mtcars$cyl, -mtcars$disp), , drop = FALSE]

distinct(): Select distinct/unique rows

dplyr::distinct() selects unique rows:

df <- tibble(
  x = sample(10, 100, rep = TRUE),
  y = sample(10, 100, rep = TRUE)
)

df %>% distinct(x) # selected columns
#> # A tibble: 10 × 1
#>       x
#>   <int>
#> 1     3
#> 2     5
#> 3     4
#> 4     7
#> # ℹ 6 more rows
df %>% distinct(x, .keep_all = TRUE) # whole data frame
#> # A tibble: 10 × 2
#>       x     y
#>   <int> <int>
#> 1     3     6
#> 2     5     2
#> 3     4     1
#> 4     7     1
#> # ℹ 6 more rows

There are two equivalents in base R, depending on whether you want the whole data frame, or just selected variables:

unique(df["x"]) # selected columns
#> # A tibble: 10 × 1
#>       x
#>   <int>
#> 1     3
#> 2     5
#> 3     4
#> 4     7
#> # ℹ 6 more rows
df[!duplicated(df$x), , drop = FALSE] # whole data frame
#> # A tibble: 10 × 2
#>       x     y
#>   <int> <int>
#> 1     3     6
#> 2     5     2
#> 3     4     1
#> 4     7     1
#> # ℹ 6 more rows

filter(): Return rows with matching conditions

dplyr::filter() selects rows where an expression is TRUE:

starwars %>% filter(species == "Human")
#> # A tibble: 35 × 14
#>   name      height  mass hair_color skin_color eye_color birth_year sex   gender
#>   <chr>      <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr> <chr> 
#> 1 Luke Sky…    172    77 blond      fair       blue            19   male  mascu…
#> 2 Darth Va…    202   136 none       white      yellow          41.9 male  mascu…
#> 3 Leia Org…    150    49 brown      light      brown           19   fema… femin…
#> 4 Owen Lars    178   120 brown, gr… light      blue            52   male  mascu…
#> # ℹ 31 more rows
#> # ℹ 5 more variables: homeworld <chr>, species <chr>, films <list>,
#> #   vehicles <list>, starships <list>
starwars %>% filter(mass > 1000)
#> # A tibble: 1 × 14
#>   name      height  mass hair_color skin_color eye_color birth_year sex   gender
#>   <chr>      <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr> <chr> 
#> 1 Jabba De…    175  1358 <NA>       green-tan… orange           600 herm… mascu…
#> # ℹ 5 more variables: homeworld <chr>, species <chr>, films <list>,
#> #   vehicles <list>, starships <list>
starwars %>% filter(hair_color == "none" & eye_color == "black")
#> # A tibble: 9 × 14
#>   name      height  mass hair_color skin_color eye_color birth_year sex   gender
#>   <chr>      <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr> <chr> 
#> 1 Nien Nunb    160    68 none       grey       black             NA male  mascu…
#> 2 Gasgano      122    NA none       white, bl… black             NA male  mascu…
#> 3 Kit Fisto    196    87 none       green      black             NA male  mascu…
#> 4 Plo Koon     188    80 none       orange     black             22 male  mascu…
#> # ℹ 5 more rows
#> # ℹ 5 more variables: homeworld <chr>, species <chr>, films <list>,
#> #   vehicles <list>, starships <list>

The closest base equivalent (and the inspiration for filter()) is subset():

subset(starwars, species == "Human")
#> # A tibble: 35 × 14
#>   name      height  mass hair_color skin_color eye_color birth_year sex   gender
#>   <chr>      <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr> <chr> 
#> 1 Luke Sky…    172    77 blond      fair       blue            19   male  mascu…
#> 2 Darth Va…    202   136 none       white      yellow          41.9 male  mascu…
#> 3 Leia Org…    150    49 brown      light      brown           19   fema… femin…
#> 4 Owen Lars    178   120 brown, gr… light      blue            52   male  mascu…
#> # ℹ 31 more rows
#> # ℹ 5 more variables: homeworld <chr>, species <chr>, films <list>,
#> #   vehicles <list>, starships <list>
subset(starwars, mass > 1000)
#> # A tibble: 1 × 14
#>   name      height  mass hair_color skin_color eye_color birth_year sex   gender
#>   <chr>      <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr> <chr> 
#> 1 Jabba De…    175  1358 <NA>       green-tan… orange           600 herm… mascu…
#> # ℹ 5 more variables: homeworld <chr>, species <chr>, films <list>,
#> #   vehicles <list>, starships <list>
subset(starwars, hair_color == "none" & eye_color == "black")
#> # A tibble: 9 × 14
#>   name      height  mass hair_color skin_color eye_color birth_year sex   gender
#>   <chr>      <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr> <chr> 
#> 1 Nien Nunb    160    68 none       grey       black             NA male  mascu…
#> 2 Gasgano      122    NA none       white, bl… black             NA male  mascu…
#> 3 Kit Fisto    196    87 none       green      black             NA male  mascu…
#> 4 Plo Koon     188    80 none       orange     black             22 male  mascu…
#> # ℹ 5 more rows
#> # ℹ 5 more variables: homeworld <chr>, species <chr>, films <list>,
#> #   vehicles <list>, starships <list>

You can also use [ but this also requires the use of which() to remove NAs:

starwars[which(starwars$species == "Human"), , drop = FALSE]
#> # A tibble: 35 × 14
#>   name      height  mass hair_color skin_color eye_color birth_year sex   gender
#>   <chr>      <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr> <chr> 
#> 1 Luke Sky…    172    77 blond      fair       blue            19   male  mascu…
#> 2 Darth Va…    202   136 none       white      yellow          41.9 male  mascu…
#> 3 Leia Org…    150    49 brown      light      brown           19   fema… femin…
#> 4 Owen Lars    178   120 brown, gr… light      blue            52   male  mascu…
#> # ℹ 31 more rows
#> # ℹ 5 more variables: homeworld <chr>, species <chr>, films <list>,
#> #   vehicles <list>, starships <list>
starwars[which(starwars$mass > 1000), , drop = FALSE]
#> # A tibble: 1 × 14
#>   name      height  mass hair_color skin_color eye_color birth_year sex   gender
#>   <chr>      <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr> <chr> 
#> 1 Jabba De…    175  1358 <NA>       green-tan… orange           600 herm… mascu…
#> # ℹ 5 more variables: homeworld <chr>, species <chr>, films <list>,
#> #   vehicles <list>, starships <list>
starwars[which(starwars$hair_color == "none" & starwars$eye_color == "black"), , drop = FALSE]
#> # A tibble: 9 × 14
#>   name      height  mass hair_color skin_color eye_color birth_year sex   gender
#>   <chr>      <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr> <chr> 
#> 1 Nien Nunb    160    68 none       grey       black             NA male  mascu…
#> 2 Gasgano      122    NA none       white, bl… black             NA male  mascu…
#> 3 Kit Fisto    196    87 none       green      black             NA male  mascu…
#> 4 Plo Koon     188    80 none       orange     black             22 male  mascu…
#> # ℹ 5 more rows
#> # ℹ 5 more variables: homeworld <chr>, species <chr>, films <list>,
#> #   vehicles <list>, starships <list>

mutate(): Create or transform variables

dplyr::mutate() creates new variables from existing variables:

df %>% mutate(z = x + y, z2 = z ^ 2)
#> # A tibble: 100 × 4
#>       x     y     z    z2
#>   <int> <int> <int> <dbl>
#> 1     3     6     9    81
#> 2     5     2     7    49
#> 3     4     1     5    25
#> 4     7     1     8    64
#> # ℹ 96 more rows

The closest base equivalent is transform(), but note that it cannot use freshly created variables:

head(transform(df, z = x + y, z2 = (x + y) ^ 2))
#>    x y  z  z2
#> 1  3 6  9  81
#> 2  5 2  7  49
#> 3  4 1  5  25
#> 4  7 1  8  64
#> 5 10 7 17 289
#> 6  7 3 10 100

Alternatively, you can use $<-:

mtcars$cyl2 <- mtcars$cyl * 2
mtcars$cyl4 <- mtcars$cyl2 * 2

When applied to a grouped data frame, dplyr::mutate() computes new variable once per group:

gf <- tibble(g = c(1, 1, 2, 2), x = c(0.5, 1.5, 2.5, 3.5))
gf %>% 
  group_by(g) %>% 
  mutate(x_mean = mean(x), x_rank = rank(x))
#> # A tibble: 4 × 4
#> # Groups:   g [2]
#>       g     x x_mean x_rank
#>   <dbl> <dbl>  <dbl>  <dbl>
#> 1     1   0.5      1      1
#> 2     1   1.5      1      2
#> 3     2   2.5      3      1
#> 4     2   3.5      3      2

To replicate this in base R, you can use ave():

transform(gf, 
  x_mean = ave(x, g, FUN = mean), 
  x_rank = ave(x, g, FUN = rank)
)
#>   g   x x_mean x_rank
#> 1 1 0.5      1      1
#> 2 1 1.5      1      2
#> 3 2 2.5      3      1
#> 4 2 3.5      3      2

pull(): Pull out a single variable

dplyr::pull() extracts a variable either by name or position:

mtcars %>% pull(1)
#>  [1] 21.0 21.0 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 17.8 16.4 17.3 15.2 10.4
#> [16] 10.4 14.7 32.4 30.4 33.9 21.5 15.5 15.2 13.3 19.2 27.3 26.0 30.4 15.8 19.7
#> [31] 15.0 21.4
mtcars %>% pull(cyl)
#>  [1] 6 6 4 6 8 6 8 4 4 6 6 8 8 8 8 8 8 4 4 4 4 8 8 8 8 4 4 4 8 6 8 4

This equivalent to [[ for positions and $ for names:

mtcars[[1]]
#>  [1] 21.0 21.0 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 17.8 16.4 17.3 15.2 10.4
#> [16] 10.4 14.7 32.4 30.4 33.9 21.5 15.5 15.2 13.3 19.2 27.3 26.0 30.4 15.8 19.7
#> [31] 15.0 21.4
mtcars$cyl
#>  [1] 6 6 4 6 8 6 8 4 4 6 6 8 8 8 8 8 8 4 4 4 4 8 8 8 8 4 4 4 8 6 8 4

relocate(): Change column order

dplyr::relocate() makes it easy to move a set of columns to a new position (by default, the front):

# to front
mtcars %>% relocate(gear, carb) 
#> # A tibble: 32 × 13
#>    gear  carb   mpg   cyl  disp    hp  drat    wt  qsec    vs    am  cyl2  cyl4
#>   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1     4     4  21       6   160   110  3.9   2.62  16.5     0     1    12    24
#> 2     4     4  21       6   160   110  3.9   2.88  17.0     0     1    12    24
#> 3     4     1  22.8     4   108    93  3.85  2.32  18.6     1     1     8    16
#> 4     3     1  21.4     6   258   110  3.08  3.22  19.4     1     0    12    24
#> # ℹ 28 more rows

# to back
mtcars %>% relocate(mpg, cyl, .after = last_col()) 
#> # A tibble: 32 × 13
#>    disp    hp  drat    wt  qsec    vs    am  gear  carb  cyl2  cyl4   mpg   cyl
#>   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1   160   110  3.9   2.62  16.5     0     1     4     4    12    24  21       6
#> 2   160   110  3.9   2.88  17.0     0     1     4     4    12    24  21       6
#> 3   108    93  3.85  2.32  18.6     1     1     4     1     8    16  22.8     4
#> 4   258   110  3.08  3.22  19.4     1     0     3     1    12    24  21.4     6
#> # ℹ 28 more rows

We can replicate this in base R with a little set manipulation:

mtcars[union(c("gear", "carb"), names(mtcars))]
#> # A tibble: 32 × 13
#>    gear  carb   mpg   cyl  disp    hp  drat    wt  qsec    vs    am  cyl2  cyl4
#>   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1     4     4  21       6   160   110  3.9   2.62  16.5     0     1    12    24
#> 2     4     4  21       6   160   110  3.9   2.88  17.0     0     1    12    24
#> 3     4     1  22.8     4   108    93  3.85  2.32  18.6     1     1     8    16
#> 4     3     1  21.4     6   258   110  3.08  3.22  19.4     1     0    12    24
#> # ℹ 28 more rows

to_back <- c("mpg", "cyl")
mtcars[c(setdiff(names(mtcars), to_back), to_back)]
#> # A tibble: 32 × 13
#>    disp    hp  drat    wt  qsec    vs    am  gear  carb  cyl2  cyl4   mpg   cyl
#>   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1   160   110  3.9   2.62  16.5     0     1     4     4    12    24  21       6
#> 2   160   110  3.9   2.88  17.0     0     1     4     4    12    24  21       6
#> 3   108    93  3.85  2.32  18.6     1     1     4     1     8    16  22.8     4
#> 4   258   110  3.08  3.22  19.4     1     0     3     1    12    24  21.4     6
#> # ℹ 28 more rows

Moving columns to somewhere in the middle requires a little more set twiddling.

rename(): Rename variables by name

dplyr::rename() allows you to rename variables by name or position:

iris %>% rename(sepal_length = Sepal.Length, sepal_width = 2)
#> # A tibble: 150 × 5
#>   sepal_length sepal_width Petal.Length Petal.Width Species
#>          <dbl>       <dbl>        <dbl>       <dbl> <fct>  
#> 1          5.1         3.5          1.4         0.2 setosa 
#> 2          4.9         3            1.4         0.2 setosa 
#> 3          4.7         3.2          1.3         0.2 setosa 
#> 4          4.6         3.1          1.5         0.2 setosa 
#> # ℹ 146 more rows

Renaming variables by position is straight forward in base R:

iris2 <- iris
names(iris2)[2] <- "sepal_width"

Renaming variables by name requires a bit more work:

names(iris2)[names(iris2) == "Sepal.Length"] <- "sepal_length"

rename_with(): Rename variables with a function

dplyr::rename_with() transform column names with a function:

iris %>% rename_with(toupper)
#> # A tibble: 150 × 5
#>   SEPAL.LENGTH SEPAL.WIDTH PETAL.LENGTH PETAL.WIDTH SPECIES
#>          <dbl>       <dbl>        <dbl>       <dbl> <fct>  
#> 1          5.1         3.5          1.4         0.2 setosa 
#> 2          4.9         3            1.4         0.2 setosa 
#> 3          4.7         3.2          1.3         0.2 setosa 
#> 4          4.6         3.1          1.5         0.2 setosa 
#> # ℹ 146 more rows

A similar effect can be achieved with setNames() in base R:

setNames(iris, toupper(names(iris)))
#> # A tibble: 150 × 5
#>   SEPAL.LENGTH SEPAL.WIDTH PETAL.LENGTH PETAL.WIDTH SPECIES
#>          <dbl>       <dbl>        <dbl>       <dbl> <fct>  
#> 1          5.1         3.5          1.4         0.2 setosa 
#> 2          4.9         3            1.4         0.2 setosa 
#> 3          4.7         3.2          1.3         0.2 setosa 
#> 4          4.6         3.1          1.5         0.2 setosa 
#> # ℹ 146 more rows

select(): Select variables by name

dplyr::select() subsets columns by position, name, function of name, or other property:

iris %>% select(1:3)
#> # A tibble: 150 × 3
#>   Sepal.Length Sepal.Width Petal.Length
#>          <dbl>       <dbl>        <dbl>
#> 1          5.1         3.5          1.4
#> 2          4.9         3            1.4
#> 3          4.7         3.2          1.3
#> 4          4.6         3.1          1.5
#> # ℹ 146 more rows
iris %>% select(Species, Sepal.Length)
#> # A tibble: 150 × 2
#>   Species Sepal.Length
#>   <fct>          <dbl>
#> 1 setosa           5.1
#> 2 setosa           4.9
#> 3 setosa           4.7
#> 4 setosa           4.6
#> # ℹ 146 more rows
iris %>% select(starts_with("Petal"))
#> # A tibble: 150 × 2
#>   Petal.Length Petal.Width
#>          <dbl>       <dbl>
#> 1          1.4         0.2
#> 2          1.4         0.2
#> 3          1.3         0.2
#> 4          1.5         0.2
#> # ℹ 146 more rows
iris %>% select(where(is.factor))
#> # A tibble: 150 × 1
#>   Species
#>   <fct>  
#> 1 setosa 
#> 2 setosa 
#> 3 setosa 
#> 4 setosa 
#> # ℹ 146 more rows

Subsetting variables by position is straightforward in base R:

iris[1:3] # single argument selects columns; never drops
#> # A tibble: 150 × 3
#>   Sepal.Length Sepal.Width Petal.Length
#>          <dbl>       <dbl>        <dbl>
#> 1          5.1         3.5          1.4
#> 2          4.9         3            1.4
#> 3          4.7         3.2          1.3
#> 4          4.6         3.1          1.5
#> # ℹ 146 more rows
iris[1:3, , drop = FALSE]
#> # A tibble: 3 × 5
#>   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
#>          <dbl>       <dbl>        <dbl>       <dbl> <fct>  
#> 1          5.1         3.5          1.4         0.2 setosa 
#> 2          4.9         3            1.4         0.2 setosa 
#> 3          4.7         3.2          1.3         0.2 setosa

You have two options to subset by name:

iris[c("Species", "Sepal.Length")]
#> # A tibble: 150 × 2
#>   Species Sepal.Length
#>   <fct>          <dbl>
#> 1 setosa           5.1
#> 2 setosa           4.9
#> 3 setosa           4.7
#> 4 setosa           4.6
#> # ℹ 146 more rows
subset(iris, select = c(Species, Sepal.Length))
#> # A tibble: 150 × 2
#>   Species Sepal.Length
#>   <fct>          <dbl>
#> 1 setosa           5.1
#> 2 setosa           4.9
#> 3 setosa           4.7
#> 4 setosa           4.6
#> # ℹ 146 more rows

Subsetting by function of name requires a bit of work with grep():

iris[grep("^Petal", names(iris))]
#> # A tibble: 150 × 2
#>   Petal.Length Petal.Width
#>          <dbl>       <dbl>
#> 1          1.4         0.2
#> 2          1.4         0.2
#> 3          1.3         0.2
#> 4          1.5         0.2
#> # ℹ 146 more rows

And you can use Filter() to subset by type:

Filter(is.factor, iris)
#> # A tibble: 150 × 1
#>   Species
#>   <fct>  
#> 1 setosa 
#> 2 setosa 
#> 3 setosa 
#> 4 setosa 
#> # ℹ 146 more rows

summarise(): Reduce multiple values down to a single value

dplyr::summarise() computes one or more summaries for each group:

mtcars %>% 
  group_by(cyl) %>% 
  summarise(mean = mean(disp), n = n())
#> # A tibble: 3 × 3
#>     cyl  mean     n
#>   <dbl> <dbl> <int>
#> 1     4  105.    11
#> 2     6  183.     7
#> 3     8  353.    14

I think the closest base R equivalent uses by(). Unfortunately by() returns a list of data frames, but you can combine them back together again with do.call() and rbind():

mtcars_by <- by(mtcars, mtcars$cyl, function(df) {
  with(df, data.frame(cyl = cyl[[1]], mean = mean(disp), n = nrow(df)))
})
do.call(rbind, mtcars_by)
#>   cyl     mean  n
#> 4   4 105.1364 11
#> 6   6 183.3143  7
#> 8   8 353.1000 14

aggregate() comes very close to providing an elegant answer:

agg <- aggregate(disp ~ cyl, mtcars, function(x) c(mean = mean(x), n = length(x)))
agg
#>   cyl disp.mean   disp.n
#> 1   4  105.1364  11.0000
#> 2   6  183.3143   7.0000
#> 3   8  353.1000  14.0000

But unfortunately while it looks like there are disp.mean and disp.n columns, it’s actually a single matrix column:

str(agg)
#> 'data.frame':    3 obs. of  2 variables:
#>  $ cyl : num  4 6 8
#>  $ disp: num [1:3, 1:2] 105 183 353 11 7 ...
#>   ..- attr(*, "dimnames")=List of 2
#>   .. ..$ : NULL
#>   .. ..$ : chr [1:2] "mean" "n"

You can see a variety of other options at https://gist.github.com/hadley/c430501804349d382ce90754936ab8ec.

slice(): Choose rows by position

slice() selects rows with their location:

slice(mtcars, 25:n())
#> # A tibble: 8 × 13
#>     mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb  cyl2  cyl4
#>   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1  19.2     8 400     175  3.08  3.84  17.0     0     0     3     2    16    32
#> 2  27.3     4  79      66  4.08  1.94  18.9     1     1     4     1     8    16
#> 3  26       4 120.     91  4.43  2.14  16.7     0     1     5     2     8    16
#> 4  30.4     4  95.1   113  3.77  1.51  16.9     1     1     5     2     8    16
#> # ℹ 4 more rows

This is straightforward to replicate with [:

mtcars[25:nrow(mtcars), , drop = FALSE]
#> # A tibble: 8 × 13
#>     mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb  cyl2  cyl4
#>   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1  19.2     8 400     175  3.08  3.84  17.0     0     0     3     2    16    32
#> 2  27.3     4  79      66  4.08  1.94  18.9     1     1     4     1     8    16
#> 3  26       4 120.     91  4.43  2.14  16.7     0     1     5     2     8    16
#> 4  30.4     4  95.1   113  3.77  1.51  16.9     1     1     5     2     8    16
#> # ℹ 4 more rows

Two-table verbs

When we want to merge two data frames, x and y), we have a variety of different ways to bring them together. Various base R merge() calls are replaced by a variety of dplyr join() functions.

dplyr base
inner_join(df1, df2) merge(df1, df2)
left_join(df1, df2) merge(df1, df2, all.x = TRUE)
right_join(df1, df2) merge(df1, df2, all.y = TRUE)
full_join(df1, df2) merge(df1, df2, all = TRUE)
semi_join(df1, df2) df1[df1$x %in% df2$x, , drop = FALSE]
anti_join(df1, df2) df1[!df1$x %in% df2$x, , drop = FALSE]

For more information about two-table verbs, see vignette("two-table").

Mutating joins

dplyr’s inner_join(), left_join(), right_join(), and full_join() add new columns from y to x, matching rows based on a set of “keys”, and differ only in how missing matches are handled. They are equivalent to calls to merge() with various settings of the all, all.x, and all.y arguments. The main difference is the order of the rows:

  • dplyr preserves the order of the x data frame.
  • merge() sorts the key columns.

Filtering joins

dplyr’s semi_join() and anti_join() affect only the rows, not the columns:

band_members %>% semi_join(band_instruments)
#> Joining with `by = join_by(name)`
#> # A tibble: 2 × 2
#>   name  band   
#>   <chr> <chr>  
#> 1 John  Beatles
#> 2 Paul  Beatles
band_members %>% anti_join(band_instruments)
#> Joining with `by = join_by(name)`
#> # A tibble: 1 × 2
#>   name  band  
#>   <chr> <chr> 
#> 1 Mick  Stones

They can be replicated in base R with [ and %in%:

band_members[band_members$name %in% band_instruments$name, , drop = FALSE]
#> # A tibble: 2 × 2
#>   name  band   
#>   <chr> <chr>  
#> 1 John  Beatles
#> 2 Paul  Beatles
band_members[!band_members$name %in% band_instruments$name, , drop = FALSE]
#> # A tibble: 1 × 2
#>   name  band  
#>   <chr> <chr> 
#> 1 Mick  Stones

Semi and anti joins with multiple key variables are considerably more challenging to implement.

dplyr/inst/doc/dplyr.Rmd0000644000176200001440000003304114366556340014741 0ustar liggesusers--- title: "Introduction to dplyr" output: rmarkdown::html_vignette description: > Start here if this is your first time using dplyr. You'll learn the basic philosophy, the most important data manipulation verbs, and the pipe, `%>%`, which allows you to combine multiple verbs together to solve real problems. vignette: > %\VignetteIndexEntry{Introduction to dplyr} %\VignetteEngine{knitr::rmarkdown} \usepackage[utf8]{inputenc} --- ```{r, echo = FALSE, message = FALSE} knitr::opts_chunk$set(collapse = T, comment = "#>") options(tibble.print_min = 4L, tibble.print_max = 4L) library(dplyr) set.seed(1014) ``` When working with data you must: * Figure out what you want to do. * Describe those tasks in the form of a computer program. * Execute the program. The dplyr package makes these steps fast and easy: * By constraining your options, it helps you think about your data manipulation challenges. * It provides simple "verbs", functions that correspond to the most common data manipulation tasks, to help you translate your thoughts into code. * It uses efficient backends, so you spend less time waiting for the computer. This document introduces you to dplyr's basic set of tools, and shows you how to apply them to data frames. dplyr also supports databases via the dbplyr package, once you've installed, read `vignette("dbplyr")` to learn more. ## Data: starwars To explore the basic data manipulation verbs of dplyr, we'll use the dataset `starwars`. This dataset contains `r nrow(starwars)` characters and comes from the [Star Wars API](https://swapi.dev), and is documented in `?starwars` ```{r} dim(starwars) starwars ``` Note that `starwars` is a tibble, a modern reimagining of the data frame. It's particularly useful for large datasets because it only prints the first few rows. You can learn more about tibbles at ; in particular you can convert data frames to tibbles with `as_tibble()`. ## Single table verbs dplyr aims to provide a function for each basic verb of data manipulation. These verbs can be organised into three categories based on the component of the dataset that they work with: * Rows: * `filter()` chooses rows based on column values. * `slice()` chooses rows based on location. * `arrange()` changes the order of the rows. * Columns: * `select()` changes whether or not a column is included. * `rename()` changes the name of columns. * `mutate()` changes the values of columns and creates new columns. * `relocate()` changes the order of the columns. * Groups of rows: * `summarise()` collapses a group into a single row. ### The pipe All of the dplyr functions take a data frame (or tibble) as the first argument. Rather than forcing the user to either save intermediate objects or nest functions, dplyr provides the `%>%` operator from magrittr. `x %>% f(y)` turns into `f(x, y)` so the result from one step is then "piped" into the next step. You can use the pipe to rewrite multiple operations that you can read left-to-right, top-to-bottom (reading the pipe operator as "then"). ### Filter rows with `filter()` `filter()` allows you to select a subset of rows in a data frame. Like all single verbs, the first argument is the tibble (or data frame). The second and subsequent arguments refer to variables within that data frame, selecting rows where the expression is `TRUE`. For example, we can select all character with light skin color and brown eyes with: ```{r} starwars %>% filter(skin_color == "light", eye_color == "brown") ``` This is roughly equivalent to this base R code: ```{r, eval = FALSE} starwars[starwars$skin_color == "light" & starwars$eye_color == "brown", ] ``` ### Arrange rows with `arrange()` `arrange()` works similarly to `filter()` except that instead of filtering or selecting rows, it reorders them. It takes a data frame, and a set of column names (or more complicated expressions) to order by. If you provide more than one column name, each additional column will be used to break ties in the values of preceding columns: ```{r} starwars %>% arrange(height, mass) ``` Use `desc()` to order a column in descending order: ```{r} starwars %>% arrange(desc(height)) ``` ### Choose rows using their position with `slice()` `slice()` lets you index rows by their (integer) locations. It allows you to select, remove, and duplicate rows. We can get characters from row numbers 5 through 10. ```{r} starwars %>% slice(5:10) ``` It is accompanied by a number of helpers for common use cases: * `slice_head()` and `slice_tail()` select the first or last rows. ```{r} starwars %>% slice_head(n = 3) ``` * `slice_sample()` randomly selects rows. Use the option prop to choose a certain proportion of the cases. ```{r} starwars %>% slice_sample(n = 5) starwars %>% slice_sample(prop = 0.1) ``` Use `replace = TRUE` to perform a bootstrap sample. If needed, you can weight the sample with the `weight` argument. * `slice_min()` and `slice_max()` select rows with highest or lowest values of a variable. Note that we first must choose only the values which are not NA. ```{r} starwars %>% filter(!is.na(height)) %>% slice_max(height, n = 3) ``` ### Select columns with `select()` Often you work with large datasets with many columns but only a few are actually of interest to you. `select()` allows you to rapidly zoom in on a useful subset using operations that usually only work on numeric variable positions: ```{r} # Select columns by name starwars %>% select(hair_color, skin_color, eye_color) # Select all columns between hair_color and eye_color (inclusive) starwars %>% select(hair_color:eye_color) # Select all columns except those from hair_color to eye_color (inclusive) starwars %>% select(!(hair_color:eye_color)) # Select all columns ending with color starwars %>% select(ends_with("color")) ``` There are a number of helper functions you can use within `select()`, like `starts_with()`, `ends_with()`, `matches()` and `contains()`. These let you quickly match larger blocks of variables that meet some criterion. See `?select` for more details. You can rename variables with `select()` by using named arguments: ```{r} starwars %>% select(home_world = homeworld) ``` But because `select()` drops all the variables not explicitly mentioned, it's not that useful. Instead, use `rename()`: ```{r} starwars %>% rename(home_world = homeworld) ``` ### Add new columns with `mutate()` Besides selecting sets of existing columns, it's often useful to add new columns that are functions of existing columns. This is the job of `mutate()`: ```{r} starwars %>% mutate(height_m = height / 100) ``` We can't see the height in meters we just calculated, but we can fix that using a select command. ```{r} starwars %>% mutate(height_m = height / 100) %>% select(height_m, height, everything()) ``` `dplyr::mutate()` is similar to the base `transform()`, but allows you to refer to columns that you've just created: ```{r} starwars %>% mutate( height_m = height / 100, BMI = mass / (height_m^2) ) %>% select(BMI, everything()) ``` If you only want to keep the new variables, use `.keep = "none"`: ```{r} starwars %>% mutate( height_m = height / 100, BMI = mass / (height_m^2), .keep = "none" ) ``` ### Change column order with `relocate()` Use a similar syntax as `select()` to move blocks of columns at once ```{r} starwars %>% relocate(sex:homeworld, .before = height) ``` ### Summarise values with `summarise()` The last verb is `summarise()`. It collapses a data frame to a single row. ```{r} starwars %>% summarise(height = mean(height, na.rm = TRUE)) ``` It's not that useful until we learn the `group_by()` verb below. ### Commonalities You may have noticed that the syntax and function of all these verbs are very similar: * The first argument is a data frame. * The subsequent arguments describe what to do with the data frame. You can refer to columns in the data frame directly without using `$`. * The result is a new data frame Together these properties make it easy to chain together multiple simple steps to achieve a complex result. These five functions provide the basis of a language of data manipulation. At the most basic level, you can only alter a tidy data frame in five useful ways: you can reorder the rows (`arrange()`), pick observations and variables of interest (`filter()` and `select()`), add new variables that are functions of existing variables (`mutate()`), or collapse many values to a summary (`summarise()`). ## Combining functions with `%>%` The dplyr API is functional in the sense that function calls don't have side-effects. You must always save their results. This doesn't lead to particularly elegant code, especially if you want to do many operations at once. You either have to do it step-by-step: ```{r, eval = FALSE} a1 <- group_by(starwars, species, sex) a2 <- select(a1, height, mass) a3 <- summarise(a2, height = mean(height, na.rm = TRUE), mass = mean(mass, na.rm = TRUE) ) ``` Or if you don't want to name the intermediate results, you need to wrap the function calls inside each other: ```{r} summarise( select( group_by(starwars, species, sex), height, mass ), height = mean(height, na.rm = TRUE), mass = mean(mass, na.rm = TRUE) ) ``` This is difficult to read because the order of the operations is from inside to out. Thus, the arguments are a long way away from the function. To get around this problem, dplyr provides the `%>%` operator from magrittr. `x %>% f(y)` turns into `f(x, y)` so you can use it to rewrite multiple operations that you can read left-to-right, top-to-bottom (reading the pipe operator as "then"): ```{r, eval = FALSE} starwars %>% group_by(species, sex) %>% select(height, mass) %>% summarise( height = mean(height, na.rm = TRUE), mass = mean(mass, na.rm = TRUE) ) ``` ## Patterns of operations The dplyr verbs can be classified by the type of operations they accomplish (we sometimes speak of their **semantics**, i.e., their meaning). It's helpful to have a good grasp of the difference between select and mutate operations. ### Selecting operations One of the appealing features of dplyr is that you can refer to columns from the tibble as if they were regular variables. However, the syntactic uniformity of referring to bare column names hides semantical differences across the verbs. A column symbol supplied to `select()` does not have the same meaning as the same symbol supplied to `mutate()`. Selecting operations expect column names and positions. Hence, when you call `select()` with bare variable names, they actually represent their own positions in the tibble. The following calls are completely equivalent from dplyr's point of view: ```{r} # `name` represents the integer 1 select(starwars, name) select(starwars, 1) ``` By the same token, this means that you cannot refer to variables from the surrounding context if they have the same name as one of the columns. In the following example, `height` still represents 2, not 5: ```{r} height <- 5 select(starwars, height) ``` One useful subtlety is that this only applies to bare names and to selecting calls like `c(height, mass)` or `height:mass`. In all other cases, the columns of the data frame are not put in scope. This allows you to refer to contextual variables in selection helpers: ```{r} name <- "color" select(starwars, ends_with(name)) ``` These semantics are usually intuitive. But note the subtle difference: ```{r} name <- 5 select(starwars, name, identity(name)) ``` In the first argument, `name` represents its own position `1`. In the second argument, `name` is evaluated in the surrounding context and represents the fifth column. For a long time, `select()` used to only understand column positions. Counting from dplyr 0.6, it now understands column names as well. This makes it a bit easier to program with `select()`: ```{r} vars <- c("name", "height") select(starwars, all_of(vars), "mass") ``` ### Mutating operations Mutate semantics are quite different from selection semantics. Whereas `select()` expects column names or positions, `mutate()` expects *column vectors*. We will set up a smaller tibble to use for our examples. ```{r} df <- starwars %>% select(name, height, mass) ``` When we use `select()`, the bare column names stand for their own positions in the tibble. For `mutate()` on the other hand, column symbols represent the actual column vectors stored in the tibble. Consider what happens if we give a string or a number to `mutate()`: ```{r} mutate(df, "height", 2) ``` `mutate()` gets length-1 vectors that it interprets as new columns in the data frame. These vectors are recycled so they match the number of rows. That's why it doesn't make sense to supply expressions like `"height" + 10` to `mutate()`. This amounts to adding 10 to a string! The correct expression is: ```{r} mutate(df, height + 10) ``` In the same way, you can unquote values from the context if these values represent a valid column. They must be either length 1 (they then get recycled) or have the same length as the number of rows. In the following example we create a new vector that we add to the data frame: ```{r} var <- seq(1, nrow(df)) mutate(df, new = var) ``` A case in point is `group_by()`. While you might think it has select semantics, it actually has mutate semantics. This is quite handy as it allows to group by a modified column: ```{r} group_by(starwars, sex) group_by(starwars, sex = as.factor(sex)) group_by(starwars, height_binned = cut(height, 3)) ``` This is why you can't supply a column name to `group_by()`. This amounts to creating a new column containing the string recycled to the number of rows: ```{r} group_by(df, "month") ``` dplyr/inst/doc/colwise.html0000644000176200001440000015750414525507101015475 0ustar liggesusers Column-wise operations

Column-wise operations

It’s often useful to perform the same operation on multiple columns, but copying and pasting is both tedious and error prone:

df %>% 
  group_by(g1, g2) %>% 
  summarise(a = mean(a), b = mean(b), c = mean(c), d = mean(d))

(If you’re trying to compute mean(a, b, c, d) for each row, instead see vignette("rowwise"))

This vignette will introduce you to the across() function, which lets you rewrite the previous code more succinctly:

df %>% 
  group_by(g1, g2) %>% 
  summarise(across(a:d, mean))

We’ll start by discussing the basic usage of across(), particularly as it applies to summarise(), and show how to use it with multiple functions. We’ll then show a few uses with other verbs. We’ll finish off with a bit of history, showing why we prefer across() to our last approach (the _if(), _at() and _all() functions) and how to translate your old code to the new syntax.

library(dplyr, warn.conflicts = FALSE)

Basic usage

across() has two primary arguments:

  • The first argument, .cols, selects the columns you want to operate on. It uses tidy selection (like select()) so you can pick variables by position, name, and type.

  • The second argument, .fns, is a function or list of functions to apply to each column. This can also be a purrr style formula (or list of formulas) like ~ .x / 2. (This argument is optional, and you can omit it if you just want to get the underlying data; you’ll see that technique used in vignette("rowwise").)

Here are a couple of examples of across() in conjunction with its favourite verb, summarise(). But you can use across() with any dplyr verb, as you’ll see a little later.

starwars %>% 
  summarise(across(where(is.character), n_distinct))
#> # A tibble: 1 × 8
#>    name hair_color skin_color eye_color   sex gender homeworld species
#>   <int>      <int>      <int>     <int> <int>  <int>     <int>   <int>
#> 1    87         12         31        15     5      3        49      38

starwars %>% 
  group_by(species) %>% 
  filter(n() > 1) %>% 
  summarise(across(c(sex, gender, homeworld), n_distinct))
#> # A tibble: 9 × 4
#>   species    sex gender homeworld
#>   <chr>    <int>  <int>     <int>
#> 1 Droid        1      2         3
#> 2 Gungan       1      1         1
#> 3 Human        2      2        15
#> 4 Kaminoan     2      2         1
#> # ℹ 5 more rows

starwars %>% 
  group_by(homeworld) %>% 
  filter(n() > 1) %>% 
  summarise(across(where(is.numeric), ~ mean(.x, na.rm = TRUE)))
#> # A tibble: 10 × 4
#>   homeworld height  mass birth_year
#>   <chr>      <dbl> <dbl>      <dbl>
#> 1 Alderaan    176.  64         43  
#> 2 Corellia    175   78.5       25  
#> 3 Coruscant   174.  50         91  
#> 4 Kamino      208.  83.1       31.5
#> # ℹ 6 more rows

Because across() is usually used in combination with summarise() and mutate(), it doesn’t select grouping variables in order to avoid accidentally modifying them:

df <- data.frame(g = c(1, 1, 2), x = c(-1, 1, 3), y = c(-1, -4, -9))
df %>% 
  group_by(g) %>% 
  summarise(across(where(is.numeric), sum))
#> # A tibble: 2 × 3
#>       g     x     y
#>   <dbl> <dbl> <dbl>
#> 1     1     0    -5
#> 2     2     3    -9

Multiple functions

You can transform each variable with more than one function by supplying a named list of functions or lambda functions in the second argument:

min_max <- list(
  min = ~min(.x, na.rm = TRUE), 
  max = ~max(.x, na.rm = TRUE)
)
starwars %>% summarise(across(where(is.numeric), min_max))
#> # A tibble: 1 × 6
#>   height_min height_max mass_min mass_max birth_year_min birth_year_max
#>        <int>      <int>    <dbl>    <dbl>          <dbl>          <dbl>
#> 1         66        264       15     1358              8            896
starwars %>% summarise(across(c(height, mass, birth_year), min_max))
#> # A tibble: 1 × 6
#>   height_min height_max mass_min mass_max birth_year_min birth_year_max
#>        <int>      <int>    <dbl>    <dbl>          <dbl>          <dbl>
#> 1         66        264       15     1358              8            896

Control how the names are created with the .names argument which takes a glue spec:

starwars %>% summarise(across(where(is.numeric), min_max, .names = "{.fn}.{.col}"))
#> # A tibble: 1 × 6
#>   min.height max.height min.mass max.mass min.birth_year max.birth_year
#>        <int>      <int>    <dbl>    <dbl>          <dbl>          <dbl>
#> 1         66        264       15     1358              8            896
starwars %>% summarise(across(c(height, mass, birth_year), min_max, .names = "{.fn}.{.col}"))
#> # A tibble: 1 × 6
#>   min.height max.height min.mass max.mass min.birth_year max.birth_year
#>        <int>      <int>    <dbl>    <dbl>          <dbl>          <dbl>
#> 1         66        264       15     1358              8            896

If you’d prefer all summaries with the same function to be grouped together, you’ll have to expand the calls yourself:

starwars %>% summarise(
  across(c(height, mass, birth_year), ~min(.x, na.rm = TRUE), .names = "min_{.col}"),
  across(c(height, mass, birth_year), ~max(.x, na.rm = TRUE), .names = "max_{.col}")
)
#> # A tibble: 1 × 6
#>   min_height min_mass min_birth_year max_height max_mass max_birth_year
#>        <int>    <dbl>          <dbl>      <int>    <dbl>          <dbl>
#> 1         66       15              8        264     1358            896

(One day this might become an argument to across() but we’re not yet sure how it would work.)

We cannot however use where(is.numeric) in that last case because the second across() would pick up the variables that were newly created (“min_height”, “min_mass” and “min_birth_year”).

We can work around this by combining both calls to across() into a single expression that returns a tibble:

starwars %>% summarise(
  tibble(
    across(where(is.numeric), ~min(.x, na.rm = TRUE), .names = "min_{.col}"),
    across(where(is.numeric), ~max(.x, na.rm = TRUE), .names = "max_{.col}")  
  )
)
#> # A tibble: 1 × 6
#>   min_height min_mass min_birth_year max_height max_mass max_birth_year
#>        <int>    <dbl>          <dbl>      <int>    <dbl>          <dbl>
#> 1         66       15              8        264     1358            896

Alternatively we could reorganize results with relocate():

starwars %>% 
  summarise(across(where(is.numeric), min_max, .names = "{.fn}.{.col}")) %>% 
  relocate(starts_with("min"))
#> # A tibble: 1 × 6
#>   min.height min.mass min.birth_year max.height max.mass max.birth_year
#>        <int>    <dbl>          <dbl>      <int>    <dbl>          <dbl>
#> 1         66       15              8        264     1358            896

Current column

If you need to, you can access the name of the “current” column inside by calling cur_column(). This can be useful if you want to perform some sort of context dependent transformation that’s already encoded in a vector:

df <- tibble(x = 1:3, y = 3:5, z = 5:7)
mult <- list(x = 1, y = 10, z = 100)

df %>% mutate(across(all_of(names(mult)), ~ .x * mult[[cur_column()]]))
#> # A tibble: 3 × 3
#>       x     y     z
#>   <dbl> <dbl> <dbl>
#> 1     1    30   500
#> 2     2    40   600
#> 3     3    50   700

Gotchas

Be careful when combining numeric summaries with where(is.numeric):

df <- data.frame(x = c(1, 2, 3), y = c(1, 4, 9))

df %>% 
  summarise(n = n(), across(where(is.numeric), sd))
#>    n x        y
#> 1 NA 1 4.041452

Here n becomes NA because n is numeric, so the across() computes its standard deviation, and the standard deviation of 3 (a constant) is NA. You probably want to compute n() last to avoid this problem:

df %>% 
  summarise(across(where(is.numeric), sd), n = n())
#>   x        y n
#> 1 1 4.041452 3

Alternatively, you could explicitly exclude n from the columns to operate on:

df %>% 
  summarise(n = n(), across(where(is.numeric) & !n, sd))
#>   n x        y
#> 1 3 1 4.041452

Another approach is to combine both the call to n() and across() in a single expression that returns a tibble:

df %>% 
  summarise(
    tibble(n = n(), across(where(is.numeric), sd))
  )
#>   n x        y
#> 1 3 1 4.041452

Other verbs

So far we’ve focused on the use of across() with summarise(), but it works with any other dplyr verb that uses data masking:

  • Rescale all numeric variables to range 0-1:

    rescale01 <- function(x) {
      rng <- range(x, na.rm = TRUE)
      (x - rng[1]) / (rng[2] - rng[1])
    }
    df <- tibble(x = 1:4, y = rnorm(4))
    df %>% mutate(across(where(is.numeric), rescale01))
    #> # A tibble: 4 × 2
    #>       x     y
    #>   <dbl> <dbl>
    #> 1 0     0.385
    #> 2 0.333 1    
    #> 3 0.667 0    
    #> 4 1     0.903

For some verbs, like group_by(), count() and distinct(), you don’t need to supply a summary function, but it can be useful to use tidy-selection to dynamically select a set of columns. In those cases, we recommend using the complement to across(), pick(), which works like across() but doesn’t apply any functions and instead returns a data frame containing the selected columns.

  • Find all distinct

    starwars %>% distinct(pick(contains("color")))
    #> # A tibble: 67 × 3
    #>   hair_color skin_color  eye_color
    #>   <chr>      <chr>       <chr>    
    #> 1 blond      fair        blue     
    #> 2 <NA>       gold        yellow   
    #> 3 <NA>       white, blue red      
    #> 4 none       white       yellow   
    #> # ℹ 63 more rows
  • Count all combinations of variables with a given pattern:

    starwars %>% count(pick(contains("color")), sort = TRUE)
    #> # A tibble: 67 × 4
    #>   hair_color skin_color eye_color     n
    #>   <chr>      <chr>      <chr>     <int>
    #> 1 brown      light      brown         6
    #> 2 brown      fair       blue          4
    #> 3 none       grey       black         4
    #> 4 black      dark       brown         3
    #> # ℹ 63 more rows

across() doesn’t work with select() or rename() because they already use tidy select syntax; if you want to transform column names with a function, you can use rename_with().

filter()

We cannot directly use across() in filter() because we need an extra step to combine the results. To that end, filter() has two special purpose companion functions:

  • if_any() keeps the rows where the predicate is true for at least one selected column:
starwars %>% 
  filter(if_any(everything(), ~ !is.na(.x)))
#> # A tibble: 87 × 14
#>   name      height  mass hair_color skin_color eye_color birth_year sex   gender
#>   <chr>      <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr> <chr> 
#> 1 Luke Sky…    172    77 blond      fair       blue            19   male  mascu…
#> 2 C-3PO        167    75 <NA>       gold       yellow         112   none  mascu…
#> 3 R2-D2         96    32 <NA>       white, bl… red             33   none  mascu…
#> 4 Darth Va…    202   136 none       white      yellow          41.9 male  mascu…
#> # ℹ 83 more rows
#> # ℹ 5 more variables: homeworld <chr>, species <chr>, films <list>,
#> #   vehicles <list>, starships <list>
  • if_all() keeps the rows where the predicate is true for all selected columns:
starwars %>% 
  filter(if_all(everything(), ~ !is.na(.x)))
#> # A tibble: 29 × 14
#>   name      height  mass hair_color skin_color eye_color birth_year sex   gender
#>   <chr>      <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr> <chr> 
#> 1 Luke Sky…    172    77 blond      fair       blue            19   male  mascu…
#> 2 Darth Va…    202   136 none       white      yellow          41.9 male  mascu…
#> 3 Leia Org…    150    49 brown      light      brown           19   fema… femin…
#> 4 Owen Lars    178   120 brown, gr… light      blue            52   male  mascu…
#> # ℹ 25 more rows
#> # ℹ 5 more variables: homeworld <chr>, species <chr>, films <list>,
#> #   vehicles <list>, starships <list>

_if, _at, _all

Prior versions of dplyr allowed you to apply a function to multiple columns in a different way: using functions with _if, _at, and _all() suffixes. These functions solved a pressing need and are used by many people, but are now superseded. That means that they’ll stay around, but won’t receive any new features and will only get critical bug fixes.

Why do we like across()?

Why did we decide to move away from these functions in favour of across()?

  1. across() makes it possible to express useful summaries that were previously impossible:

    df %>%
      group_by(g1, g2) %>% 
      summarise(
        across(where(is.numeric), mean), 
        across(where(is.factor), nlevels),
        n = n(), 
      )
  2. across() reduces the number of functions that dplyr needs to provide. This makes dplyr easier for you to use (because there are fewer functions to remember) and easier for us to implement new verbs (since we only need to implement one function, not four).

  3. across() unifies _if and _at semantics so that you can select by position, name, and type, and you can now create compound selections that were previously impossible. For example, you can now transform all numeric columns whose name begins with “x”: across(where(is.numeric) & starts_with("x")).

  4. across() doesn’t need to use vars(). The _at() functions are the only place in dplyr where you have to manually quote variable names, which makes them a little weird and hence harder to remember.

Why did it take so long to discover across()?

It’s disappointing that we didn’t discover across() earlier, and instead worked through several false starts (first not realising that it was a common problem, then with the _each() functions, and most recently with the _if()/_at()/_all() functions). But across() couldn’t work without three recent discoveries:

  • You can have a column of a data frame that is itself a data frame. This is something provided by base R, but it’s not very well documented, and it took a while to see that it was useful, not just a theoretical curiosity.

  • We can use data frames to allow summary functions to return multiple columns.

  • We can use the absence of an outer name as a convention that you want to unpack a data frame column into individual columns.

How do you convert existing code?

Fortunately, it’s generally straightforward to translate your existing code to use across():

  • Strip the _if(), _at() and _all() suffix off the function.

  • Call across(). The first argument will be:

    1. For _if(), the old second argument wrapped in where().
    2. For _at(), the old second argument, with the call to vars() removed.
    3. For _all(), everything().

    The subsequent arguments can be copied as is.

For example:

df %>% mutate_if(is.numeric, ~mean(.x, na.rm = TRUE))
# ->
df %>% mutate(across(where(is.numeric), ~mean(.x, na.rm = TRUE)))

df %>% mutate_at(vars(c(x, starts_with("y"))), mean)
# ->
df %>% mutate(across(c(x, starts_with("y")), mean))

df %>% mutate_all(mean)
# ->
df %>% mutate(across(everything(), mean))

There are a few exceptions to this rule:

  • rename_*() and select_*() follow a different pattern. They already have select semantics, so are generally used in a different way that doesn’t have a direct equivalent with across(); use the new rename_with() instead.

  • Previously, filter_*() were paired with the all_vars() and any_vars() helpers. The new helpers if_any() and if_all() can be used inside filter() to keep rows for which the predicate is true for at least one, or all selected columns:

    df <- tibble(x = c("a", "b"), y = c(1, 1), z = c(-1, 1))
    
    # Find all rows where EVERY numeric variable is greater than zero
    df %>% filter(if_all(where(is.numeric), ~ .x > 0))
    #> # A tibble: 1 × 3
    #>   x         y     z
    #>   <chr> <dbl> <dbl>
    #> 1 b         1     1
    
    # Find all rows where ANY numeric variable is greater than zero
    df %>% filter(if_any(where(is.numeric), ~ .x > 0))
    #> # A tibble: 2 × 3
    #>   x         y     z
    #>   <chr> <dbl> <dbl>
    #> 1 a         1    -1
    #> 2 b         1     1
  • When used in a mutate(), all transformations performed by an across() are applied at once. This is different to the behaviour of mutate_if(), mutate_at(), and mutate_all(), which apply the transformations one at a time. We expect that you’ll generally find the new behaviour less surprising:

    df <- tibble(x = 2, y = 4, z = 8)
    df %>% mutate_all(~ .x / y)
    #> # A tibble: 1 × 3
    #>       x     y     z
    #>   <dbl> <dbl> <dbl>
    #> 1   0.5     1     8
    
    df %>% mutate(across(everything(), ~ .x / y))
    #> # A tibble: 1 × 3
    #>       x     y     z
    #>   <dbl> <dbl> <dbl>
    #> 1   0.5     1     2
dplyr/inst/doc/two-table.html0000644000176200001440000012435114525507107015726 0ustar liggesusers Two-table verbs

Two-table verbs

It’s rare that a data analysis involves only a single table of data. In practice, you’ll normally have many tables that contribute to an analysis, and you need flexible tools to combine them. In dplyr, there are three families of verbs that work with two tables at a time:

  • Mutating joins, which add new variables to one table from matching rows in another.

  • Filtering joins, which filter observations from one table based on whether or not they match an observation in the other table.

  • Set operations, which combine the observations in the data sets as if they were set elements.

(This discussion assumes that you have tidy data, where the rows are observations and the columns are variables. If you’re not familiar with that framework, I’d recommend reading up on it first.)

All two-table verbs work similarly. The first two arguments are x and y, and provide the tables to combine. The output is always a new table with the same type as x.

Mutating joins

Mutating joins allow you to combine variables from multiple tables. For example, consider the flights and airlines data from the nycflights13 package. In one table we have flight information with an abbreviation for carrier, and in another we have a mapping between abbreviations and full names. You can use a join to add the carrier names to the flight data:

library(nycflights13)
# Drop unimportant variables so it's easier to understand the join results.
flights2 <- flights %>% select(year:day, hour, origin, dest, tailnum, carrier)

flights2 %>% 
  left_join(airlines)
#> Joining with `by = join_by(carrier)`
#> # A tibble: 336,776 × 9
#>    year month   day  hour origin dest  tailnum carrier name                  
#>   <int> <int> <int> <dbl> <chr>  <chr> <chr>   <chr>   <chr>                 
#> 1  2013     1     1     5 EWR    IAH   N14228  UA      United Air Lines Inc. 
#> 2  2013     1     1     5 LGA    IAH   N24211  UA      United Air Lines Inc. 
#> 3  2013     1     1     5 JFK    MIA   N619AA  AA      American Airlines Inc.
#> 4  2013     1     1     5 JFK    BQN   N804JB  B6      JetBlue Airways       
#> 5  2013     1     1     6 LGA    ATL   N668DN  DL      Delta Air Lines Inc.  
#> # ℹ 336,771 more rows

Controlling how the tables are matched

As well as x and y, each mutating join takes an argument by that controls which variables are used to match observations in the two tables. There are a few ways to specify it, as I illustrate below with various tables from nycflights13:

  • NULL, the default. dplyr will will use all variables that appear in both tables, a natural join. For example, the flights and weather tables match on their common variables: year, month, day, hour and origin.

    flights2 %>% left_join(weather)
    #> Joining with `by = join_by(year, month, day, hour, origin)`
    #> # A tibble: 336,776 × 18
    #>    year month   day  hour origin dest  tailnum carrier  temp  dewp humid
    #>   <int> <int> <int> <dbl> <chr>  <chr> <chr>   <chr>   <dbl> <dbl> <dbl>
    #> 1  2013     1     1     5 EWR    IAH   N14228  UA       39.0  28.0  64.4
    #> 2  2013     1     1     5 LGA    IAH   N24211  UA       39.9  25.0  54.8
    #> 3  2013     1     1     5 JFK    MIA   N619AA  AA       39.0  27.0  61.6
    #> 4  2013     1     1     5 JFK    BQN   N804JB  B6       39.0  27.0  61.6
    #> 5  2013     1     1     6 LGA    ATL   N668DN  DL       39.9  25.0  54.8
    #> # ℹ 336,771 more rows
    #> # ℹ 7 more variables: wind_dir <dbl>, wind_speed <dbl>, wind_gust <dbl>,
    #> #   precip <dbl>, pressure <dbl>, visib <dbl>, time_hour <dttm>
  • A character vector, by = "x". Like a natural join, but uses only some of the common variables. For example, flights and planes have year columns, but they mean different things so we only want to join by tailnum.

    flights2 %>% left_join(planes, by = "tailnum")
    #> # A tibble: 336,776 × 16
    #>   year.x month   day  hour origin dest  tailnum carrier year.y type             
    #>    <int> <int> <int> <dbl> <chr>  <chr> <chr>   <chr>    <int> <chr>            
    #> 1   2013     1     1     5 EWR    IAH   N14228  UA        1999 Fixed wing multi…
    #> 2   2013     1     1     5 LGA    IAH   N24211  UA        1998 Fixed wing multi…
    #> 3   2013     1     1     5 JFK    MIA   N619AA  AA        1990 Fixed wing multi…
    #> 4   2013     1     1     5 JFK    BQN   N804JB  B6        2012 Fixed wing multi…
    #> 5   2013     1     1     6 LGA    ATL   N668DN  DL        1991 Fixed wing multi…
    #> # ℹ 336,771 more rows
    #> # ℹ 6 more variables: manufacturer <chr>, model <chr>, engines <int>,
    #> #   seats <int>, speed <int>, engine <chr>

    Note that the year columns in the output are disambiguated with a suffix.

  • A named character vector: by = c("x" = "a"). This will match variable x in table x to variable a in table y. The variables from use will be used in the output.

    Each flight has an origin and destination airport, so we need to specify which one we want to join to:

    flights2 %>% left_join(airports, c("dest" = "faa"))
    #> # A tibble: 336,776 × 15
    #>    year month   day  hour origin dest  tailnum carrier name      lat   lon   alt
    #>   <int> <int> <int> <dbl> <chr>  <chr> <chr>   <chr>   <chr>   <dbl> <dbl> <dbl>
    #> 1  2013     1     1     5 EWR    IAH   N14228  UA      George…  30.0 -95.3    97
    #> 2  2013     1     1     5 LGA    IAH   N24211  UA      George…  30.0 -95.3    97
    #> 3  2013     1     1     5 JFK    MIA   N619AA  AA      Miami …  25.8 -80.3     8
    #> 4  2013     1     1     5 JFK    BQN   N804JB  B6      <NA>     NA    NA      NA
    #> 5  2013     1     1     6 LGA    ATL   N668DN  DL      Hartsf…  33.6 -84.4  1026
    #> # ℹ 336,771 more rows
    #> # ℹ 3 more variables: tz <dbl>, dst <chr>, tzone <chr>
    flights2 %>% left_join(airports, c("origin" = "faa"))
    #> # A tibble: 336,776 × 15
    #>    year month   day  hour origin dest  tailnum carrier name      lat   lon   alt
    #>   <int> <int> <int> <dbl> <chr>  <chr> <chr>   <chr>   <chr>   <dbl> <dbl> <dbl>
    #> 1  2013     1     1     5 EWR    IAH   N14228  UA      Newark…  40.7 -74.2    18
    #> 2  2013     1     1     5 LGA    IAH   N24211  UA      La Gua…  40.8 -73.9    22
    #> 3  2013     1     1     5 JFK    MIA   N619AA  AA      John F…  40.6 -73.8    13
    #> 4  2013     1     1     5 JFK    BQN   N804JB  B6      John F…  40.6 -73.8    13
    #> 5  2013     1     1     6 LGA    ATL   N668DN  DL      La Gua…  40.8 -73.9    22
    #> # ℹ 336,771 more rows
    #> # ℹ 3 more variables: tz <dbl>, dst <chr>, tzone <chr>

Types of join

There are four types of mutating join, which differ in their behaviour when a match is not found. We’ll illustrate each with a simple example:

df1 <- tibble(x = c(1, 2), y = 2:1)
df2 <- tibble(x = c(3, 1), a = 10, b = "a")
  • inner_join(x, y) only includes observations that match in both x and y.

    df1 %>% inner_join(df2) %>% knitr::kable()
    #> Joining with `by = join_by(x)`
    x y a b
    1 2 10 a
  • left_join(x, y) includes all observations in x, regardless of whether they match or not. This is the most commonly used join because it ensures that you don’t lose observations from your primary table.

    df1 %>% left_join(df2)
    #> Joining with `by = join_by(x)`
    #> # A tibble: 2 × 4
    #>       x     y     a b    
    #>   <dbl> <int> <dbl> <chr>
    #> 1     1     2    10 a    
    #> 2     2     1    NA <NA>
  • right_join(x, y) includes all observations in y. It’s equivalent to left_join(y, x), but the columns and rows will be ordered differently.

    df1 %>% right_join(df2)
    #> Joining with `by = join_by(x)`
    #> # A tibble: 2 × 4
    #>       x     y     a b    
    #>   <dbl> <int> <dbl> <chr>
    #> 1     1     2    10 a    
    #> 2     3    NA    10 a
    df2 %>% left_join(df1)
    #> Joining with `by = join_by(x)`
    #> # A tibble: 2 × 4
    #>       x     a b         y
    #>   <dbl> <dbl> <chr> <int>
    #> 1     3    10 a        NA
    #> 2     1    10 a         2
  • full_join() includes all observations from x and y.

    df1 %>% full_join(df2)
    #> Joining with `by = join_by(x)`
    #> # A tibble: 3 × 4
    #>       x     y     a b    
    #>   <dbl> <int> <dbl> <chr>
    #> 1     1     2    10 a    
    #> 2     2     1    NA <NA> 
    #> 3     3    NA    10 a

The left, right and full joins are collectively know as outer joins. When a row doesn’t match in an outer join, the new variables are filled in with missing values.

Observations

While mutating joins are primarily used to add new variables, they can also generate new observations. If a match is not unique, a join will add all possible combinations (the Cartesian product) of the matching observations:

df1 <- tibble(x = c(1, 1, 2), y = 1:3)
df2 <- tibble(x = c(1, 1, 2), z = c("a", "b", "a"))

df1 %>% left_join(df2)
#> Joining with `by = join_by(x)`
#> Warning in left_join(., df2): Detected an unexpected many-to-many relationship between `x` and `y`.
#> ℹ Row 1 of `x` matches multiple rows in `y`.
#> ℹ Row 1 of `y` matches multiple rows in `x`.
#> ℹ If a many-to-many relationship is expected, set `relationship =
#>   "many-to-many"` to silence this warning.
#> # A tibble: 5 × 3
#>       x     y z    
#>   <dbl> <int> <chr>
#> 1     1     1 a    
#> 2     1     1 b    
#> 3     1     2 a    
#> 4     1     2 b    
#> 5     2     3 a

Filtering joins

Filtering joins match observations in the same way as mutating joins, but affect the observations, not the variables. There are two types:

  • semi_join(x, y) keeps all observations in x that have a match in y.
  • anti_join(x, y) drops all observations in x that have a match in y.

These are most useful for diagnosing join mismatches. For example, there are many flights in the nycflights13 dataset that don’t have a matching tail number in the planes table:

library("nycflights13")
flights %>% 
  anti_join(planes, by = "tailnum") %>% 
  count(tailnum, sort = TRUE)
#> # A tibble: 722 × 2
#>   tailnum     n
#>   <chr>   <int>
#> 1 <NA>     2512
#> 2 N725MQ    575
#> 3 N722MQ    513
#> 4 N723MQ    507
#> 5 N713MQ    483
#> # ℹ 717 more rows

If you’re worried about what observations your joins will match, start with a semi_join() or anti_join(). semi_join() and anti_join() never duplicate; they only ever remove observations.

df1 <- tibble(x = c(1, 1, 3, 4), y = 1:4)
df2 <- tibble(x = c(1, 1, 2), z = c("a", "b", "a"))

# Four rows to start with:
df1 %>% nrow()
#> [1] 4
# And we get four rows after the join
df1 %>% inner_join(df2, by = "x") %>% nrow()
#> Warning in inner_join(., df2, by = "x"): Detected an unexpected many-to-many relationship between `x` and `y`.
#> ℹ Row 1 of `x` matches multiple rows in `y`.
#> ℹ Row 1 of `y` matches multiple rows in `x`.
#> ℹ If a many-to-many relationship is expected, set `relationship =
#>   "many-to-many"` to silence this warning.
#> [1] 4
# But only two rows actually match
df1 %>% semi_join(df2, by = "x") %>% nrow()
#> [1] 2

Set operations

The final type of two-table verb is set operations. These expect the x and y inputs to have the same variables, and treat the observations like sets:

  • intersect(x, y): return only observations in both x and y
  • union(x, y): return unique observations in x and y
  • setdiff(x, y): return observations in x, but not in y.

Given this simple data:

(df1 <- tibble(x = 1:2, y = c(1L, 1L)))
#> # A tibble: 2 × 2
#>       x     y
#>   <int> <int>
#> 1     1     1
#> 2     2     1
(df2 <- tibble(x = 1:2, y = 1:2))
#> # A tibble: 2 × 2
#>       x     y
#>   <int> <int>
#> 1     1     1
#> 2     2     2

The four possibilities are:

intersect(df1, df2)
#> # A tibble: 1 × 2
#>       x     y
#>   <int> <int>
#> 1     1     1
# Note that we get 3 rows, not 4
union(df1, df2)
#> # A tibble: 3 × 2
#>       x     y
#>   <int> <int>
#> 1     1     1
#> 2     2     1
#> 3     2     2
setdiff(df1, df2)
#> # A tibble: 1 × 2
#>       x     y
#>   <int> <int>
#> 1     2     1
setdiff(df2, df1)
#> # A tibble: 1 × 2
#>       x     y
#>   <int> <int>
#> 1     2     2

Multiple-table verbs

dplyr does not provide any functions for working with three or more tables. Instead use purrr::reduce() or Reduce(), as described in Advanced R, to iteratively combine the two-table verbs to handle as many tables as you need.

dplyr/inst/doc/window-functions.html0000644000176200001440000010410414525507110017331 0ustar liggesusers Window functions

Window functions

A window function is a variation on an aggregation function. Where an aggregation function, like sum() and mean(), takes n inputs and return a single value, a window function returns n values. The output of a window function depends on all its input values, so window functions don’t include functions that work element-wise, like + or round(). Window functions include variations on aggregate functions, like cumsum() and cummean(), functions for ranking and ordering, like rank(), and functions for taking offsets, like lead() and lag().

In this vignette, we’ll use a small sample of the Lahman batting dataset, including the players that have won an award.

library(Lahman)

batting <- Lahman::Batting %>%
  as_tibble() %>%
  select(playerID, yearID, teamID, G, AB:H) %>%
  arrange(playerID, yearID, teamID) %>%
  semi_join(Lahman::AwardsPlayers, by = "playerID")

players <- batting %>% group_by(playerID)

Window functions are used in conjunction with mutate() and filter() to solve a wide range of problems. Here’s a selection:

# For each player, find the two years with most hits
filter(players, min_rank(desc(H)) <= 2 & H > 0)
# Within each player, rank each year by the number of games played
mutate(players, G_rank = min_rank(G))

# For each player, find every year that was better than the previous year
filter(players, G > lag(G))
# For each player, compute avg change in games played per year
mutate(players, G_change = (G - lag(G)) / (yearID - lag(yearID)))

# For each player, find all years where they played more games than they did on average
filter(players, G > mean(G))
# For each, player compute a z score based on number of games played
mutate(players, G_z = (G - mean(G)) / sd(G))

Before reading this vignette, you should be familiar with mutate() and filter().

Types of window functions

There are five main families of window functions. Two families are unrelated to aggregation functions:

  • Ranking and ordering functions: row_number(), min_rank(), dense_rank(), cume_dist(), percent_rank(), and ntile(). These functions all take a vector to order by, and return various types of ranks.

  • Offsets lead() and lag() allow you to access the previous and next values in a vector, making it easy to compute differences and trends.

The other three families are variations on familiar aggregate functions:

  • Cumulative aggregates: cumsum(), cummin(), cummax() (from base R), and cumall(), cumany(), and cummean() (from dplyr).

  • Rolling aggregates operate in a fixed width window. You won’t find them in base R or in dplyr, but there are many implementations in other packages, such as RcppRoll.

  • Recycled aggregates, where an aggregate is repeated to match the length of the input. These are not needed in R because vector recycling automatically recycles aggregates where needed. They are important in SQL, because the presence of an aggregation function usually tells the database to return only one row per group.

Each family is described in more detail below, focussing on the general goals and how to use them with dplyr. For more details, refer to the individual function documentation.

Ranking functions

The ranking functions are variations on a theme, differing in how they handle ties:

x <- c(1, 1, 2, 2, 2)

row_number(x)
#> [1] 1 2 3 4 5
min_rank(x)
#> [1] 1 1 3 3 3
dense_rank(x)
#> [1] 1 1 2 2 2

If you’re familiar with R, you may recognise that row_number() and min_rank() can be computed with the base rank() function and various values of the ties.method argument. These functions are provided to save a little typing, and to make it easier to convert between R and SQL.

Two other ranking functions return numbers between 0 and 1. percent_rank() gives the percentage of the rank; cume_dist() gives the proportion of values less than or equal to the current value.

cume_dist(x)
#> [1] 0.4 0.4 1.0 1.0 1.0
percent_rank(x)
#> [1] 0.0 0.0 0.5 0.5 0.5

These are useful if you want to select (for example) the top 10% of records within each group. For example:

filter(players, cume_dist(desc(G)) < 0.1)
#> # A tibble: 1,090 × 7
#> # Groups:   playerID [995]
#>   playerID  yearID teamID     G    AB     R     H
#>   <chr>      <int> <fct>  <int> <int> <int> <int>
#> 1 aaronha01   1963 ML1      161   631   121   201
#> 2 aaronha01   1968 ATL      160   606    84   174
#> 3 abbotji01   1991 CAL       34     0     0     0
#> 4 abernte02   1965 CHN       84    18     1     3
#> # ℹ 1,086 more rows

Finally, ntile() divides the data up into n evenly sized buckets. It’s a coarse ranking, and it can be used in with mutate() to divide the data into buckets for further summary. For example, we could use ntile() to divide the players within a team into four ranked groups, and calculate the average number of games within each group.

by_team_player <- group_by(batting, teamID, playerID)
by_team <- summarise(by_team_player, G = sum(G))
#> `summarise()` has grouped output by 'teamID'. You can override using the
#> `.groups` argument.
by_team_quartile <- group_by(by_team, quartile = ntile(G, 4))
summarise(by_team_quartile, mean(G))
#> # A tibble: 4 × 2
#>   quartile `mean(G)`
#>      <int>     <dbl>
#> 1        1      22.7
#> 2        2      91.8
#> 3        3     253. 
#> 4        4     961.

All ranking functions rank from lowest to highest so that small input values get small ranks. Use desc() to rank from highest to lowest.

Lead and lag

lead() and lag() produce offset versions of a input vector that is either ahead of or behind the original vector.

x <- 1:5
lead(x)
#> [1]  2  3  4  5 NA
lag(x)
#> [1] NA  1  2  3  4

You can use them to:

  • Compute differences or percent changes.

    # Compute the relative change in games played
    mutate(players, G_delta = G - lag(G))

    Using lag() is more convenient than diff() because for n inputs diff() returns n - 1 outputs.

  • Find out when a value changes.

    # Find when a player changed teams
    filter(players, teamID != lag(teamID))

lead() and lag() have an optional argument order_by. If set, instead of using the row order to determine which value comes before another, they will use another variable. This is important if you have not already sorted the data, or you want to sort one way and lag another.

Here’s a simple example of what happens if you don’t specify order_by when you need it:

df <- data.frame(year = 2000:2005, value = (0:5) ^ 2)
scrambled <- df[sample(nrow(df)), ]

wrong <- mutate(scrambled, prev_value = lag(value))
arrange(wrong, year)
#>   year value prev_value
#> 1 2000     0          4
#> 2 2001     1          0
#> 3 2002     4          9
#> 4 2003     9         16
#> 5 2004    16         NA
#> 6 2005    25          1

right <- mutate(scrambled, prev_value = lag(value, order_by = year))
arrange(right, year)
#>   year value prev_value
#> 1 2000     0         NA
#> 2 2001     1          0
#> 3 2002     4          1
#> 4 2003     9          4
#> 5 2004    16          9
#> 6 2005    25         16

Cumulative aggregates

Base R provides cumulative sum (cumsum()), cumulative min (cummin()), and cumulative max (cummax()). (It also provides cumprod() but that is rarely useful). Other common accumulating functions are cumany() and cumall(), cumulative versions of || and &&, and cummean(), a cumulative mean. These are not included in base R, but efficient versions are provided by dplyr.

cumany() and cumall() are useful for selecting all rows up to, or all rows after, a condition is true for the first (or last) time. For example, we can use cumany() to find all records for a player after they played a year with 150 games:

filter(players, cumany(G > 150))

Like lead and lag, you may want to control the order in which the accumulation occurs. None of the built in functions have an order_by argument so dplyr provides a helper: order_by(). You give it the variable you want to order by, and then the call to the window function:

x <- 1:10
y <- 10:1
order_by(y, cumsum(x))
#>  [1] 55 54 52 49 45 40 34 27 19 10

This function uses a bit of non-standard evaluation, so I wouldn’t recommend using it inside another function; use the simpler but less concise with_order() instead.

Recycled aggregates

R’s vector recycling makes it easy to select values that are higher or lower than a summary. I call this a recycled aggregate because the value of the aggregate is recycled to be the same length as the original vector. Recycled aggregates are useful if you want to find all records greater than the mean or less than the median:

filter(players, G > mean(G))
filter(players, G < median(G))

While most SQL databases don’t have an equivalent of median() or quantile(), when filtering you can achieve the same effect with ntile(). For example, x > median(x) is equivalent to ntile(x, 2) == 2; x > quantile(x, 75) is equivalent to ntile(x, 100) > 75 or ntile(x, 4) > 3.

filter(players, ntile(G, 2) == 2)

You can also use this idea to select the records with the highest (x == max(x)) or lowest value (x == min(x)) for a field, but the ranking functions give you more control over ties, and allow you to select any number of records.

Recycled aggregates are also useful in conjunction with mutate(). For example, with the batting data, we could compute the “career year”, the number of years a player has played since they entered the league:

mutate(players, career_year = yearID - min(yearID) + 1)
#> # A tibble: 20,874 × 8
#> # Groups:   playerID [1,436]
#>   playerID  yearID teamID     G    AB     R     H career_year
#>   <chr>      <int> <fct>  <int> <int> <int> <int>       <dbl>
#> 1 aaronha01   1954 ML1      122   468    58   131           1
#> 2 aaronha01   1955 ML1      153   602   105   189           2
#> 3 aaronha01   1956 ML1      153   609   106   200           3
#> 4 aaronha01   1957 ML1      151   615   118   198           4
#> # ℹ 20,870 more rows

Or, as in the introductory example, we could compute a z-score:

mutate(players, G_z = (G - mean(G)) / sd(G))
#> # A tibble: 20,874 × 8
#> # Groups:   playerID [1,436]
#>   playerID  yearID teamID     G    AB     R     H    G_z
#>   <chr>      <int> <fct>  <int> <int> <int> <int>  <dbl>
#> 1 aaronha01   1954 ML1      122   468    58   131 -1.16 
#> 2 aaronha01   1955 ML1      153   602   105   189  0.519
#> 3 aaronha01   1956 ML1      153   609   106   200  0.519
#> 4 aaronha01   1957 ML1      151   615   118   198  0.411
#> # ℹ 20,870 more rows
dplyr/inst/doc/two-table.R0000644000176200001440000000512314525507107015156 0ustar liggesusers## ----echo = FALSE, message = FALSE-------------------------------------------- knitr::opts_chunk$set(collapse = T, comment = "#>") options(tibble.print_min = 5) library(dplyr) ## ----warning = FALSE---------------------------------------------------------- library(nycflights13) # Drop unimportant variables so it's easier to understand the join results. flights2 <- flights %>% select(year:day, hour, origin, dest, tailnum, carrier) flights2 %>% left_join(airlines) ## ----------------------------------------------------------------------------- flights2 %>% left_join(weather) ## ----------------------------------------------------------------------------- flights2 %>% left_join(planes, by = "tailnum") ## ----------------------------------------------------------------------------- flights2 %>% left_join(airports, c("dest" = "faa")) flights2 %>% left_join(airports, c("origin" = "faa")) ## ----------------------------------------------------------------------------- df1 <- tibble(x = c(1, 2), y = 2:1) df2 <- tibble(x = c(3, 1), a = 10, b = "a") ## ----------------------------------------------------------------------------- df1 %>% inner_join(df2) %>% knitr::kable() ## ----------------------------------------------------------------------------- df1 %>% left_join(df2) ## ----------------------------------------------------------------------------- df1 %>% right_join(df2) df2 %>% left_join(df1) ## ----------------------------------------------------------------------------- df1 %>% full_join(df2) ## ----------------------------------------------------------------------------- df1 <- tibble(x = c(1, 1, 2), y = 1:3) df2 <- tibble(x = c(1, 1, 2), z = c("a", "b", "a")) df1 %>% left_join(df2) ## ----------------------------------------------------------------------------- library("nycflights13") flights %>% anti_join(planes, by = "tailnum") %>% count(tailnum, sort = TRUE) ## ----------------------------------------------------------------------------- df1 <- tibble(x = c(1, 1, 3, 4), y = 1:4) df2 <- tibble(x = c(1, 1, 2), z = c("a", "b", "a")) # Four rows to start with: df1 %>% nrow() # And we get four rows after the join df1 %>% inner_join(df2, by = "x") %>% nrow() # But only two rows actually match df1 %>% semi_join(df2, by = "x") %>% nrow() ## ----------------------------------------------------------------------------- (df1 <- tibble(x = 1:2, y = c(1L, 1L))) (df2 <- tibble(x = 1:2, y = 1:2)) ## ----------------------------------------------------------------------------- intersect(df1, df2) # Note that we get 3 rows, not 4 union(df1, df2) setdiff(df1, df2) setdiff(df2, df1) dplyr/inst/doc/programming.html0000644000176200001440000014266214525507104016354 0ustar liggesusers Programming with dplyr

Programming with dplyr

Introduction

Most dplyr verbs use tidy evaluation in some way. Tidy evaluation is a special type of non-standard evaluation used throughout the tidyverse. There are two basic forms found in dplyr:

  • arrange(), count(), filter(), group_by(), mutate(), and summarise() use data masking so that you can use data variables as if they were variables in the environment (i.e. you write my_variable not df$my_variable).

  • across(), relocate(), rename(), select(), and pull() use tidy selection so you can easily choose variables based on their position, name, or type (e.g. starts_with("x") or is.numeric).

To determine whether a function argument uses data masking or tidy selection, look at the documentation: in the arguments list, you’ll see <data-masking> or <tidy-select>.

Data masking and tidy selection make interactive data exploration fast and fluid, but they add some new challenges when you attempt to use them indirectly such as in a for loop or a function. This vignette shows you how to overcome those challenges. We’ll first go over the basics of data masking and tidy selection, talk about how to use them indirectly, and then show you a number of recipes to solve common problems.

This vignette will give you the minimum knowledge you need to be an effective programmer with tidy evaluation. If you’d like to learn more about the underlying theory, or precisely how it’s different from non-standard evaluation, we recommend that you read the Metaprogramming chapters in Advanced R.

library(dplyr)

Data masking

Data masking makes data manipulation faster because it requires less typing. In most (but not all1) base R functions you need to refer to variables with $, leading to code that repeats the name of the data frame many times:

starwars[starwars$homeworld == "Naboo" & starwars$species == "Human", ,]

The dplyr equivalent of this code is more concise because data masking allows you to need to type starwars once:

starwars %>% filter(homeworld == "Naboo", species == "Human")

Data- and env-variables

The key idea behind data masking is that it blurs the line between the two different meanings of the word “variable”:

  • env-variables are “programming” variables that live in an environment. They are usually created with <-.

  • data-variables are “statistical” variables that live in a data frame. They usually come from data files (e.g. .csv, .xls), or are created manipulating existing variables.

To make those definitions a little more concrete, take this piece of code:

df <- data.frame(x = runif(3), y = runif(3))
df$x
#> [1] 0.08075014 0.83433304 0.60076089

It creates a env-variable, df, that contains two data-variables, x and y. Then it extracts the data-variable x out of the env-variable df using $.

I think this blurring of the meaning of “variable” is a really nice feature for interactive data analysis because it allows you to refer to data-vars as is, without any prefix. And this seems to be fairly intuitive since many newer R users will attempt to write diamonds[x == 0 | y == 0, ].

Unfortunately, this benefit does not come for free. When you start to program with these tools, you’re going to have to grapple with the distinction. This will be hard because you’ve never had to think about it before, so it’ll take a while for your brain to learn these new concepts and categories. However, once you’ve teased apart the idea of “variable” into data-variable and env-variable, I think you’ll find it fairly straightforward to use.

Indirection

The main challenge of programming with functions that use data masking arises when you introduce some indirection, i.e. when you want to get the data-variable from an env-variable instead of directly typing the data-variable’s name. There are two main cases:

  • When you have the data-variable in a function argument (i.e. an env-variable that holds a promise2), you need to embrace the argument by surrounding it in doubled braces, like filter(df, {{ var }}).

    The following function uses embracing to create a wrapper around summarise() that computes the minimum and maximum values of a variable, as well as the number of observations that were summarised:

    var_summary <- function(data, var) {
      data %>%
        summarise(n = n(), min = min({{ var }}), max = max({{ var }}))
    }
    mtcars %>% 
      group_by(cyl) %>% 
      var_summary(mpg)
  • When you have an env-variable that is a character vector, you need to index into the .data pronoun with [[, like summarise(df, mean = mean(.data[[var]])).

    The following example uses .data to count the number of unique values in each variable of mtcars:

    for (var in names(mtcars)) {
      mtcars %>% count(.data[[var]]) %>% print()
    }

    Note that .data is not a data frame; it’s a special construct, a pronoun, that allows you to access the current variables either directly, with .data$x or indirectly with .data[[var]]. Don’t expect other functions to work with it.

Name injection

Many data masking functions also use dynamic dots, which gives you another useful feature: generating names programmatically by using := instead of =. There are two basics forms, as illustrated below with tibble():

  • If you have the name in an env-variable, you can use glue syntax to interpolate in:

    name <- "susan"
    tibble("{name}" := 2)
    #> # A tibble: 1 × 1
    #>   susan
    #>   <dbl>
    #> 1     2
  • If the name should be derived from a data-variable in an argument, you can use embracing syntax:

    my_df <- function(x) {
      tibble("{{x}}_2" := x * 2)
    }
    my_var <- 10
    my_df(my_var)
    #> # A tibble: 1 × 1
    #>   my_var_2
    #>      <dbl>
    #> 1       20

Learn more in ?rlang::`dyn-dots`.

Tidy selection

Data masking makes it easy to compute on values within a dataset. Tidy selection is a complementary tool that makes it easy to work with the columns of a dataset.

The tidyselect DSL

Underneath all functions that use tidy selection is the tidyselect package. It provides a miniature domain specific language that makes it easy to select columns by name, position, or type. For example:

  • select(df, 1) selects the first column; select(df, last_col()) selects the last column.

  • select(df, c(a, b, c)) selects columns a, b, and c.

  • select(df, starts_with("a")) selects all columns whose name starts with “a”; select(df, ends_with("z")) selects all columns whose name ends with “z”.

  • select(df, where(is.numeric)) selects all numeric columns.

You can see more details in ?dplyr_tidy_select.

Indirection

As with data masking, tidy selection makes a common task easier at the cost of making a less common task harder. When you want to use tidy select indirectly with the column specification stored in an intermediate variable, you’ll need to learn some new tools. Again, there are two forms of indirection:

  • When you have the data-variable in an env-variable that is a function argument, you use the same technique as data masking: you embrace the argument by surrounding it in doubled braces.

    The following function summarises a data frame by computing the mean of all variables selected by the user:

    summarise_mean <- function(data, vars) {
      data %>% summarise(n = n(), across({{ vars }}, mean))
    }
    mtcars %>% 
      group_by(cyl) %>% 
      summarise_mean(where(is.numeric))
  • When you have an env-variable that is a character vector, you need to use all_of() or any_of() depending on whether you want the function to error if a variable is not found.

    The following code uses all_of() to select all of the variables found in a character vector; then ! plus all_of() to select all of the variables not found in a character vector:

    vars <- c("mpg", "vs")
    mtcars %>% select(all_of(vars))
    mtcars %>% select(!all_of(vars))

How-tos

The following examples solve a grab bag of common problems. We show you the minimum amount of code so that you can get the basic idea; most real problems will require more code or combining multiple techniques.

User-supplied data

If you check the documentation, you’ll see that .data never uses data masking or tidy select. That means you don’t need to do anything special in your function:

mutate_y <- function(data) {
  mutate(data, y = a + x)
}

One or more user-supplied expressions

If you want the user to supply an expression that’s passed onto an argument which uses data masking or tidy select, embrace the argument:

my_summarise <- function(data, group_var) {
  data %>%
    group_by({{ group_var }}) %>%
    summarise(mean = mean(mass))
}

This generalises in a straightforward way if you want to use one user-supplied expression in multiple places:

my_summarise2 <- function(data, expr) {
  data %>% summarise(
    mean = mean({{ expr }}),
    sum = sum({{ expr }}),
    n = n()
  )
}

If you want the user to provide multiple expressions, embrace each of them:

my_summarise3 <- function(data, mean_var, sd_var) {
  data %>% 
    summarise(mean = mean({{ mean_var }}), sd = sd({{ sd_var }}))
}

If you want to use the name of a variable in the output, you can embrace the variable name on the left-hand side of := with {{:

my_summarise4 <- function(data, expr) {
  data %>% summarise(
    "mean_{{expr}}" := mean({{ expr }}),
    "sum_{{expr}}" := sum({{ expr }}),
    "n_{{expr}}" := n()
  )
}
my_summarise5 <- function(data, mean_var, sd_var) {
  data %>% 
    summarise(
      "mean_{{mean_var}}" := mean({{ mean_var }}), 
      "sd_{{sd_var}}" := sd({{ sd_var }})
    )
}

Any number of user-supplied expressions

If you want to take an arbitrary number of user supplied expressions, use .... This is most often useful when you want to give the user full control over a single part of the pipeline, like a group_by() or a mutate().

my_summarise <- function(.data, ...) {
  .data %>%
    group_by(...) %>%
    summarise(mass = mean(mass, na.rm = TRUE), height = mean(height, na.rm = TRUE))
}

starwars %>% my_summarise(homeworld)
#> # A tibble: 49 × 3
#>   homeworld    mass height
#>   <chr>       <dbl>  <dbl>
#> 1 Alderaan       64   176.
#> 2 Aleen Minor    15    79 
#> 3 Bespin         79   175 
#> 4 Bestine IV    110   180 
#> # ℹ 45 more rows
starwars %>% my_summarise(sex, gender)
#> `summarise()` has grouped output by 'sex'. You can override using the `.groups`
#> argument.
#> # A tibble: 6 × 4
#> # Groups:   sex [5]
#>   sex            gender      mass height
#>   <chr>          <chr>      <dbl>  <dbl>
#> 1 female         feminine    54.7   172.
#> 2 hermaphroditic masculine 1358     175 
#> 3 male           masculine   80.2   179.
#> 4 none           feminine   NaN      96 
#> # ℹ 2 more rows

When you use ... in this way, make sure that any other arguments start with . to reduce the chances of argument clashes; see https://design.tidyverse.org/dots-prefix.html for more details.

Creating multiple columns

Sometimes it can be useful for a single expression to return multiple columns. You can do this by returning an unnamed data frame:

quantile_df <- function(x, probs = c(0.25, 0.5, 0.75)) {
  tibble(
    val = quantile(x, probs),
    quant = probs
  )
}

x <- 1:5
quantile_df(x)
#> # A tibble: 3 × 2
#>     val quant
#>   <dbl> <dbl>
#> 1     2  0.25
#> 2     3  0.5 
#> 3     4  0.75

This sort of function is useful inside summarise() and mutate() which allow you to add multiple columns by returning a data frame:

df <- tibble(
  grp = rep(1:3, each = 10),
  x = runif(30),
  y = rnorm(30)
)

df %>%
  group_by(grp) %>%
  summarise(quantile_df(x, probs = .5))
#> # A tibble: 3 × 3
#>     grp   val quant
#>   <int> <dbl> <dbl>
#> 1     1 0.361   0.5
#> 2     2 0.541   0.5
#> 3     3 0.456   0.5

df %>%
  group_by(grp) %>%
  summarise(across(x:y, ~ quantile_df(.x, probs = .5), .unpack = TRUE))
#> # A tibble: 3 × 5
#>     grp x_val x_quant   y_val y_quant
#>   <int> <dbl>   <dbl>   <dbl>   <dbl>
#> 1     1 0.361     0.5  0.174      0.5
#> 2     2 0.541     0.5 -0.0110     0.5
#> 3     3 0.456     0.5  0.0583     0.5

Notice that we set .unpack = TRUE inside across(). This tells across() to unpack the data frame returned by quantile_df() into its respective columns, combining the column names of the original columns (x and y) with the column names returned from the function (val and quant).

If your function returns multiple rows per group, then you’ll need to switch from summarise() to reframe(). summarise() is restricted to returning 1 row summaries per group, but reframe() lifts this restriction:

df %>%
  group_by(grp) %>%
  reframe(across(x:y, quantile_df, .unpack = TRUE))
#> # A tibble: 9 × 5
#>     grp x_val x_quant  y_val y_quant
#>   <int> <dbl>   <dbl>  <dbl>   <dbl>
#> 1     1 0.219    0.25 -0.710    0.25
#> 2     1 0.361    0.5   0.174    0.5 
#> 3     1 0.674    0.75  0.524    0.75
#> 4     2 0.315    0.25 -0.690    0.25
#> # ℹ 5 more rows

Transforming user-supplied variables

If you want the user to provide a set of data-variables that are then transformed, use across() and pick():

my_summarise <- function(data, summary_vars) {
  data %>%
    summarise(across({{ summary_vars }}, ~ mean(., na.rm = TRUE)))
}
starwars %>% 
  group_by(species) %>% 
  my_summarise(c(mass, height))
#> # A tibble: 38 × 3
#>   species   mass height
#>   <chr>    <dbl>  <dbl>
#> 1 Aleena      15     79
#> 2 Besalisk   102    198
#> 3 Cerean      82    198
#> 4 Chagrian   NaN    196
#> # ℹ 34 more rows

You can use this same idea for multiple sets of input data-variables:

my_summarise <- function(data, group_var, summarise_var) {
  data %>%
    group_by(pick({{ group_var }})) %>% 
    summarise(across({{ summarise_var }}, mean))
}

Use the .names argument to across() to control the names of the output.

my_summarise <- function(data, group_var, summarise_var) {
  data %>%
    group_by(pick({{ group_var }})) %>% 
    summarise(across({{ summarise_var }}, mean, .names = "mean_{.col}"))
}

Loop over multiple variables

If you have a character vector of variable names, and want to operate on them with a for loop, index into the special .data pronoun:

for (var in names(mtcars)) {
  mtcars %>% count(.data[[var]]) %>% print()
}

This same technique works with for loop alternatives like the base R apply() family and the purrr map() family:

mtcars %>% 
  names() %>% 
  purrr::map(~ count(mtcars, .data[[.x]]))

(Note that the x in .data[[x]] is always treated as an env-variable; it will never come from the data.)

Use a variable from an Shiny input

Many Shiny input controls return character vectors, so you can use the same approach as above: .data[[input$var]].

library(shiny)
ui <- fluidPage(
  selectInput("var", "Variable", choices = names(diamonds)),
  tableOutput("output")
)
server <- function(input, output, session) {
  data <- reactive(filter(diamonds, .data[[input$var]] > 0))
  output$output <- renderTable(head(data()))
}

See https://mastering-shiny.org/action-tidy.html for more details and case studies.


  1. dplyr’s filter() is inspired by base R’s subset(). subset() provides data masking, but not with tidy evaluation, so the techniques described in this chapter don’t apply to it.↩︎

  2. In R, arguments are lazily evaluated which means that until you attempt to use, they don’t hold a value, just a promise that describes how to compute the value. You can learn more at https://adv-r.hadley.nz/functions.html#lazy-evaluation↩︎

dplyr/inst/doc/dplyr.R0000644000176200001440000001227214525507102014410 0ustar liggesusers## ----echo = FALSE, message = FALSE-------------------------------------------- knitr::opts_chunk$set(collapse = T, comment = "#>") options(tibble.print_min = 4L, tibble.print_max = 4L) library(dplyr) set.seed(1014) ## ----------------------------------------------------------------------------- dim(starwars) starwars ## ----------------------------------------------------------------------------- starwars %>% filter(skin_color == "light", eye_color == "brown") ## ----eval = FALSE------------------------------------------------------------- # starwars[starwars$skin_color == "light" & starwars$eye_color == "brown", ] ## ----------------------------------------------------------------------------- starwars %>% arrange(height, mass) ## ----------------------------------------------------------------------------- starwars %>% arrange(desc(height)) ## ----------------------------------------------------------------------------- starwars %>% slice(5:10) ## ----------------------------------------------------------------------------- starwars %>% slice_head(n = 3) ## ----------------------------------------------------------------------------- starwars %>% slice_sample(n = 5) starwars %>% slice_sample(prop = 0.1) ## ----------------------------------------------------------------------------- starwars %>% filter(!is.na(height)) %>% slice_max(height, n = 3) ## ----------------------------------------------------------------------------- # Select columns by name starwars %>% select(hair_color, skin_color, eye_color) # Select all columns between hair_color and eye_color (inclusive) starwars %>% select(hair_color:eye_color) # Select all columns except those from hair_color to eye_color (inclusive) starwars %>% select(!(hair_color:eye_color)) # Select all columns ending with color starwars %>% select(ends_with("color")) ## ----------------------------------------------------------------------------- starwars %>% select(home_world = homeworld) ## ----------------------------------------------------------------------------- starwars %>% rename(home_world = homeworld) ## ----------------------------------------------------------------------------- starwars %>% mutate(height_m = height / 100) ## ----------------------------------------------------------------------------- starwars %>% mutate(height_m = height / 100) %>% select(height_m, height, everything()) ## ----------------------------------------------------------------------------- starwars %>% mutate( height_m = height / 100, BMI = mass / (height_m^2) ) %>% select(BMI, everything()) ## ----------------------------------------------------------------------------- starwars %>% mutate( height_m = height / 100, BMI = mass / (height_m^2), .keep = "none" ) ## ----------------------------------------------------------------------------- starwars %>% relocate(sex:homeworld, .before = height) ## ----------------------------------------------------------------------------- starwars %>% summarise(height = mean(height, na.rm = TRUE)) ## ----eval = FALSE------------------------------------------------------------- # a1 <- group_by(starwars, species, sex) # a2 <- select(a1, height, mass) # a3 <- summarise(a2, # height = mean(height, na.rm = TRUE), # mass = mean(mass, na.rm = TRUE) # ) ## ----------------------------------------------------------------------------- summarise( select( group_by(starwars, species, sex), height, mass ), height = mean(height, na.rm = TRUE), mass = mean(mass, na.rm = TRUE) ) ## ----eval = FALSE------------------------------------------------------------- # starwars %>% # group_by(species, sex) %>% # select(height, mass) %>% # summarise( # height = mean(height, na.rm = TRUE), # mass = mean(mass, na.rm = TRUE) # ) ## ----------------------------------------------------------------------------- # `name` represents the integer 1 select(starwars, name) select(starwars, 1) ## ----------------------------------------------------------------------------- height <- 5 select(starwars, height) ## ----------------------------------------------------------------------------- name <- "color" select(starwars, ends_with(name)) ## ----------------------------------------------------------------------------- name <- 5 select(starwars, name, identity(name)) ## ----------------------------------------------------------------------------- vars <- c("name", "height") select(starwars, all_of(vars), "mass") ## ----------------------------------------------------------------------------- df <- starwars %>% select(name, height, mass) ## ----------------------------------------------------------------------------- mutate(df, "height", 2) ## ----------------------------------------------------------------------------- mutate(df, height + 10) ## ----------------------------------------------------------------------------- var <- seq(1, nrow(df)) mutate(df, new = var) ## ----------------------------------------------------------------------------- group_by(starwars, sex) group_by(starwars, sex = as.factor(sex)) group_by(starwars, height_binned = cut(height, 3)) ## ----------------------------------------------------------------------------- group_by(df, "month") dplyr/inst/doc/in-packages.html0000644000176200001440000005732314525507103016212 0ustar liggesusers Using dplyr in packages

Using dplyr in packages

library(dplyr)

This vignette is aimed at package authors who use dplyr in their packages. We will discuss best practices learned over the years to avoid R CMD check notes and warnings, and how to handle when dplyr deprecates functions.

Join helpers

As of dplyr 1.1.0, we’ve introduced join_by() along 4 helpers for performing various types of joins:

  • closest()

  • between()

  • within()

  • overlaps()

join_by() implements a domain specific language (DSL) for joins, and internally interprets calls to these functions.

You’ll notice that dplyr::closest() isn’t an exported function from dplyr (dplyr::between() and base::within() do happen to be preexisting functions). If you use closest() in your package, then this will cause an R CMD check note letting you know that you’ve used a symbol that doesn’t belong to any package.

To silence this, place utils::globalVariables("closest") in a source file in your package (but outside of any function). dbplyr does a similar thing for SQL functions, so you can see an example of that here.

You may also have to add utils to your package Imports, even though it is a base package. You can do that easily with usethis::use_package("utils").

Data masking and tidy selection NOTEs

If you’re writing a package and you have a function that uses data masking or tidy selection:

my_summary_function <- function(data) {
  data %>% 
    select(grp, x, y) %>% 
    filter(x > 0) %>% 
    group_by(grp) %>% 
    summarise(y = mean(y), n = n())
}

You’ll get an NOTE because R CMD check doesn’t know that dplyr functions use tidy evaluation:

N  checking R code for possible problems
   my_summary_function: no visible binding for global variable ‘grp’, ‘x’, ‘y’
   Undefined global functions or variables:
     grp x y

To eliminate this note:

  • For data masking, import .data from rlang and then use .data$var instead of var.
  • For tidy selection, use "var" instead of var.

That yields:

#' @importFrom rlang .data
my_summary_function <- function(data) {
  data %>% 
    select("grp", "x", "y") %>% 
    filter(.data$x > 0) %>% 
    group_by(.data$grp) %>% 
    summarise(y = mean(.data$y), n = n())
}

For more about programming with dplyr, see vignette("programming", package = "dplyr").

Deprecation

This section is focused on updating package code to deal with backwards incompatible changes in dplyr. We do try and minimize backward incompatible changes as much as possible, but sometimes they are necessary in order to radically simplify existing code, or unlock a lot of potential value in the future.

We will start with some general advice about supporting multiple versions of dplyr at once, and then we will discuss some specific changes in dplyr.

Multiple dplyr versions

Ideally, when we introduce a breaking change you’ll want to make sure that your package works with both the released version and the development version of dplyr. This is typically a little bit more work, but has two big advantages:

  • It’s more convenient for your users, since your package will work for them regardless of what version of dplyr they have installed.

  • It’s easier on CRAN since it doesn’t require a massive coordinated release of multiple packages.

If we break your package, we will typically send you a pull request that implements a patch before releasing the next version of dplyr. Most of the time, this patch will be backwards compatible with older versions of dplyr as well. Ideally, you’ll accept this patch and submit a new version of your package to CRAN before the new version of dplyr is released.

To make code work with multiple versions of a package, your first tool is the simple if statement:

if (utils::packageVersion("dplyr") > "0.5.0") {
  # code for new version
} else {
  # code for old version
}

Always condition on > current-version, not >= next-version because this will ensure that this branch is also used for the development version of the package. For example, if the current release is version "0.5.0", the development version will be "0.5.0.9000".

This typically works well if the branch for the “new version” introduces a new argument or has a slightly different return value.

This doesn’t work if we’ve introduced a new function that you need to switch to, like:

if (utils::packageVersion("dplyr") > "1.0.10") {
  dplyr::reframe(df, x = unique(x))
} else {
  dplyr::summarise(df, x = unique(x))
}

In this case, when checks are run with dplyr 1.0.10 you’ll get a warning about using a function from dplyr that doesn’t exist (reframe()) even though that branch will never run. You can get around this by using utils::getFromNamespace() to indirectly call the new dplyr function:

if (utils::packageVersion("dplyr") > "1.0.10") {
  utils::getFromNamespace("reframe", "dplyr")(df, x = unique(x))
} else {
  dplyr::summarise(df, x = unique(x))
}

As soon as the next version of dplyr is actually on CRAN (1.1.0 in this case), you should feel free to remove this code and unconditionally use reframe() as long as you also require dplyr (>= 1.1.0) in your DESCRIPTION file. This is typically not very painful for users, because they’d already be updating your package when they run into this requirement, so updating one more package along the way is generally easy. It also helps them get the latest bug fixes and features from dplyr.

Sometimes, it isn’t possible to avoid a call to @importFrom. For example you might be importing a generic so that you can define a method for it, but that generic has moved between packages. In this case, you can take advantage of a little-known feature in the NAMESPACE file: you can include raw if statements.

#' @rawNamespace
#' if (utils::packageVersion("dplyr") > "0.5.0") {
#'   importFrom("dbplyr", "build_sql")
#' } else {
#'   importFrom("dplyr", "build_sql")
#' }

Deprecation of mutate_*() and summarise_*()

The following mutate() and summarise() variants were deprecated in dplyr 0.7.0:

  • mutate_each(), summarise_each()

and the following variants were superseded in dplyr 1.0.0:

  • mutate_all(), summarise_all()

  • mutate_if(), summarise_if()

  • mutate_at(), summarise_at()

These have all been replaced by using mutate() or summarise() in combination with across(), which was introduced in dplyr 1.0.0.

If you used mutate_all() or mutate_each() without supplying a selection, you should update to use across(everything()):

starwars %>% mutate_each(funs(as.character))
starwars %>% mutate_all(funs(as.character))
starwars %>% mutate(across(everything(), as.character))

If you provided a selection through mutate_at() or mutate_each(), then you can switch to across() with a selection:

starwars %>% mutate_each(funs(as.character), height, mass)
starwars %>% mutate_at(vars(height, mass), as.character)
starwars %>% mutate(across(c(height, mass), as.character))

If you used predicates with mutate_if(), you can switch to using across() in combination with where():

starwars %>% mutate_if(is.factor, as.character)
starwars %>% mutate(across(where(is.factor), as.character))

Data frame subclasses

If you are a package author that is extending dplyr to work with a new data frame subclass, then we encourage you to read the documentation in ?dplyr_extending. This contains advice on how to implement the minimal number of extension generics possible to get maximal compatibility across dplyr’s verbs.

dplyr/inst/doc/in-packages.R0000644000176200001440000000460614525507103015443 0ustar liggesusers## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----setup, message = FALSE--------------------------------------------------- library(dplyr) ## ----------------------------------------------------------------------------- my_summary_function <- function(data) { data %>% select(grp, x, y) %>% filter(x > 0) %>% group_by(grp) %>% summarise(y = mean(y), n = n()) } ## ----------------------------------------------------------------------------- #' @importFrom rlang .data my_summary_function <- function(data) { data %>% select("grp", "x", "y") %>% filter(.data$x > 0) %>% group_by(.data$grp) %>% summarise(y = mean(.data$y), n = n()) } ## ----eval=FALSE--------------------------------------------------------------- # if (utils::packageVersion("dplyr") > "0.5.0") { # # code for new version # } else { # # code for old version # } ## ----eval=FALSE--------------------------------------------------------------- # if (utils::packageVersion("dplyr") > "1.0.10") { # dplyr::reframe(df, x = unique(x)) # } else { # dplyr::summarise(df, x = unique(x)) # } ## ----eval=FALSE--------------------------------------------------------------- # if (utils::packageVersion("dplyr") > "1.0.10") { # utils::getFromNamespace("reframe", "dplyr")(df, x = unique(x)) # } else { # dplyr::summarise(df, x = unique(x)) # } ## ----eval=FALSE--------------------------------------------------------------- # #' @rawNamespace # #' if (utils::packageVersion("dplyr") > "0.5.0") { # #' importFrom("dbplyr", "build_sql") # #' } else { # #' importFrom("dplyr", "build_sql") # #' } ## ----eval=FALSE--------------------------------------------------------------- # starwars %>% mutate_each(funs(as.character)) # starwars %>% mutate_all(funs(as.character)) # starwars %>% mutate(across(everything(), as.character)) ## ----eval = FALSE------------------------------------------------------------- # starwars %>% mutate_each(funs(as.character), height, mass) # starwars %>% mutate_at(vars(height, mass), as.character) # starwars %>% mutate(across(c(height, mass), as.character)) ## ----eval=FALSE--------------------------------------------------------------- # starwars %>% mutate_if(is.factor, as.character) # starwars %>% mutate(across(where(is.factor), as.character)) dplyr/inst/doc/window-functions.R0000644000176200001440000000705214525507107016600 0ustar liggesusers## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set(collapse = T, comment = "#>") options(tibble.print_min = 4L, tibble.print_max = 4L) library(dplyr) set.seed(1014) if (!rlang::is_installed("Lahman")) { knitr::opts_chunk$set(eval = FALSE) } ## ----------------------------------------------------------------------------- library(Lahman) batting <- Lahman::Batting %>% as_tibble() %>% select(playerID, yearID, teamID, G, AB:H) %>% arrange(playerID, yearID, teamID) %>% semi_join(Lahman::AwardsPlayers, by = "playerID") players <- batting %>% group_by(playerID) ## ----eval = FALSE------------------------------------------------------------- # # For each player, find the two years with most hits # filter(players, min_rank(desc(H)) <= 2 & H > 0) # # Within each player, rank each year by the number of games played # mutate(players, G_rank = min_rank(G)) # # # For each player, find every year that was better than the previous year # filter(players, G > lag(G)) # # For each player, compute avg change in games played per year # mutate(players, G_change = (G - lag(G)) / (yearID - lag(yearID))) # # # For each player, find all years where they played more games than they did on average # filter(players, G > mean(G)) # # For each, player compute a z score based on number of games played # mutate(players, G_z = (G - mean(G)) / sd(G)) ## ----------------------------------------------------------------------------- x <- c(1, 1, 2, 2, 2) row_number(x) min_rank(x) dense_rank(x) ## ----------------------------------------------------------------------------- cume_dist(x) percent_rank(x) ## ----------------------------------------------------------------------------- filter(players, cume_dist(desc(G)) < 0.1) ## ----------------------------------------------------------------------------- by_team_player <- group_by(batting, teamID, playerID) by_team <- summarise(by_team_player, G = sum(G)) by_team_quartile <- group_by(by_team, quartile = ntile(G, 4)) summarise(by_team_quartile, mean(G)) ## ----------------------------------------------------------------------------- x <- 1:5 lead(x) lag(x) ## ----results = "hide"--------------------------------------------------------- # Compute the relative change in games played mutate(players, G_delta = G - lag(G)) ## ----results = "hide"--------------------------------------------------------- # Find when a player changed teams filter(players, teamID != lag(teamID)) ## ----------------------------------------------------------------------------- df <- data.frame(year = 2000:2005, value = (0:5) ^ 2) scrambled <- df[sample(nrow(df)), ] wrong <- mutate(scrambled, prev_value = lag(value)) arrange(wrong, year) right <- mutate(scrambled, prev_value = lag(value, order_by = year)) arrange(right, year) ## ----eval = FALSE------------------------------------------------------------- # filter(players, cumany(G > 150)) ## ----------------------------------------------------------------------------- x <- 1:10 y <- 10:1 order_by(y, cumsum(x)) ## ----eval = FALSE------------------------------------------------------------- # filter(players, G > mean(G)) # filter(players, G < median(G)) ## ----eval = FALSE------------------------------------------------------------- # filter(players, ntile(G, 2) == 2) ## ----------------------------------------------------------------------------- mutate(players, career_year = yearID - min(yearID) + 1) ## ----------------------------------------------------------------------------- mutate(players, G_z = (G - mean(G)) / sd(G)) dplyr/inst/doc/rowwise.R0000644000176200001440000001462314525507105014762 0ustar liggesusers## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set(collapse = T, comment = "#>") options(tibble.print_min = 4L, tibble.print_max = 4L) ## ----setup-------------------------------------------------------------------- library(dplyr, warn.conflicts = FALSE) ## ----include = FALSE---------------------------------------------------------- nest_by <- function(df, ...) { df %>% group_by(...) %>% summarise(data = list(pick(everything()))) %>% rowwise(...) } # mtcars %>% nest_by(cyl) ## ----------------------------------------------------------------------------- df <- tibble(x = 1:2, y = 3:4, z = 5:6) df %>% rowwise() ## ----------------------------------------------------------------------------- df %>% mutate(m = mean(c(x, y, z))) df %>% rowwise() %>% mutate(m = mean(c(x, y, z))) ## ----------------------------------------------------------------------------- df <- tibble(name = c("Mara", "Hadley"), x = 1:2, y = 3:4, z = 5:6) df %>% rowwise() %>% summarise(m = mean(c(x, y, z))) df %>% rowwise(name) %>% summarise(m = mean(c(x, y, z))) ## ----------------------------------------------------------------------------- df <- tibble(id = 1:6, w = 10:15, x = 20:25, y = 30:35, z = 40:45) df ## ----------------------------------------------------------------------------- rf <- df %>% rowwise(id) ## ----------------------------------------------------------------------------- rf %>% mutate(total = sum(c(w, x, y, z))) rf %>% summarise(total = sum(c(w, x, y, z))) ## ----------------------------------------------------------------------------- rf %>% mutate(total = sum(c_across(w:z))) rf %>% mutate(total = sum(c_across(where(is.numeric)))) ## ----------------------------------------------------------------------------- rf %>% mutate(total = sum(c_across(w:z))) %>% ungroup() %>% mutate(across(w:z, ~ . / total)) ## ----------------------------------------------------------------------------- df %>% mutate(total = rowSums(pick(where(is.numeric), -id))) df %>% mutate(mean = rowMeans(pick(where(is.numeric), -id))) ## ----eval = FALSE, include = FALSE-------------------------------------------- # bench::mark( # df %>% mutate(m = rowSums(pick(x:z))), # df %>% mutate(m = apply(pick(x:z), 1, sum)), # df %>% rowwise() %>% mutate(m = sum(pick(x:z))), # check = FALSE # ) ## ----------------------------------------------------------------------------- df <- tibble( x = list(1, 2:3, 4:6) ) ## ----------------------------------------------------------------------------- df %>% mutate(l = length(x)) ## ----------------------------------------------------------------------------- df %>% mutate(l = lengths(x)) ## ----------------------------------------------------------------------------- df %>% mutate(l = sapply(x, length)) df %>% mutate(l = purrr::map_int(x, length)) ## ----------------------------------------------------------------------------- df %>% rowwise() %>% mutate(l = length(x)) ## ----------------------------------------------------------------------------- df <- tibble(g = 1:2, y = list(1:3, "a")) gf <- df %>% group_by(g) rf <- df %>% rowwise(g) ## ----------------------------------------------------------------------------- gf %>% mutate(type = typeof(y), length = length(y)) rf %>% mutate(type = typeof(y), length = length(y)) ## ----------------------------------------------------------------------------- # grouped out1 <- integer(2) for (i in 1:2) { out1[[i]] <- length(df$y[i]) } out1 # rowwise out2 <- integer(2) for (i in 1:2) { out2[[i]] <- length(df$y[[i]]) } out2 ## ----error = TRUE------------------------------------------------------------- gf %>% mutate(y2 = y) rf %>% mutate(y2 = y) rf %>% mutate(y2 = list(y)) ## ----------------------------------------------------------------------------- by_cyl <- mtcars %>% nest_by(cyl) by_cyl ## ----------------------------------------------------------------------------- mods <- by_cyl %>% mutate(mod = list(lm(mpg ~ wt, data = data))) mods ## ----------------------------------------------------------------------------- mods <- mods %>% mutate(pred = list(predict(mod, data))) mods ## ----------------------------------------------------------------------------- mods %>% summarise(rmse = sqrt(mean((pred - data$mpg) ^ 2))) mods %>% summarise(rsq = summary(mod)$r.squared) mods %>% summarise(broom::glance(mod)) ## ----------------------------------------------------------------------------- mods %>% reframe(broom::tidy(mod)) ## ----------------------------------------------------------------------------- df <- tribble( ~ n, ~ min, ~ max, 1, 0, 1, 2, 10, 100, 3, 100, 1000, ) ## ----------------------------------------------------------------------------- df %>% rowwise() %>% mutate(data = list(runif(n, min, max))) ## ----error = TRUE------------------------------------------------------------- df %>% rowwise() %>% mutate(data = runif(n, min, max)) ## ----------------------------------------------------------------------------- df <- expand.grid(mean = c(-1, 0, 1), sd = c(1, 10, 100)) df %>% rowwise() %>% mutate(data = list(rnorm(10, mean, sd))) ## ----------------------------------------------------------------------------- df <- tribble( ~rng, ~params, "runif", list(n = 10), "rnorm", list(n = 20), "rpois", list(n = 10, lambda = 5), ) %>% rowwise() df %>% mutate(data = list(do.call(rng, params))) ## ----include = FALSE, eval = FALSE-------------------------------------------- # df <- rowwise(tribble( # ~rng, ~params, # "runif", list(min = -1, max = 1), # "rnorm", list(), # "rpois", list(lambda = 5), # )) # # # Has to happen in separate function to avoid eager unquoting # f <- function(rng, params) purrr::exec(rng, n = 10, !!!params) # df %>% # mutate(data = list(f(rng, params))) ## ----------------------------------------------------------------------------- mtcars %>% group_by(cyl) %>% do(head(., 1)) ## ----------------------------------------------------------------------------- mtcars %>% group_by(cyl) %>% reframe(head(pick(everything()), 1)) ## ----------------------------------------------------------------------------- mtcars %>% group_by(cyl) %>% do(nrows = nrow(.)) ## ----------------------------------------------------------------------------- mtcars %>% group_by(cyl) %>% summarise(nrows = nrow(pick(everything()))) dplyr/inst/doc/programming.Rmd0000644000176200001440000003536214406402754016133 0ustar liggesusers--- title: "Programming with dplyr" description: > Most dplyr verbs use "tidy evaluation", a special type of non-standard evaluation. In this vignette, you'll learn the two basic forms, data masking and tidy selection, and how you can program with them using either functions or for loops. output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Programming with dplyr} %\VignetteEngine{knitr::rmarkdown} %\usepackage[utf8]{inputenc} --- ```{r, echo = FALSE, message = FALSE} knitr::opts_chunk$set(collapse = T, comment = "#>") options(tibble.print_min = 4L, tibble.print_max = 4L) set.seed(1014) ``` ## Introduction Most dplyr verbs use **tidy evaluation** in some way. Tidy evaluation is a special type of non-standard evaluation used throughout the tidyverse. There are two basic forms found in dplyr: - `arrange()`, `count()`, `filter()`, `group_by()`, `mutate()`, and `summarise()` use **data masking** so that you can use data variables as if they were variables in the environment (i.e. you write `my_variable` not `df$my_variable`). - `across()`, `relocate()`, `rename()`, `select()`, and `pull()` use **tidy selection** so you can easily choose variables based on their position, name, or type (e.g. `starts_with("x")` or `is.numeric`). To determine whether a function argument uses data masking or tidy selection, look at the documentation: in the arguments list, you'll see `` or ``. Data masking and tidy selection make interactive data exploration fast and fluid, but they add some new challenges when you attempt to use them indirectly such as in a for loop or a function. This vignette shows you how to overcome those challenges. We'll first go over the basics of data masking and tidy selection, talk about how to use them indirectly, and then show you a number of recipes to solve common problems. This vignette will give you the minimum knowledge you need to be an effective programmer with tidy evaluation. If you'd like to learn more about the underlying theory, or precisely how it's different from non-standard evaluation, we recommend that you read the Metaprogramming chapters in [*Advanced R*](https://adv-r.hadley.nz). ```{r setup, message = FALSE} library(dplyr) ``` ## Data masking Data masking makes data manipulation faster because it requires less typing. In most (but not all[^1]) base R functions you need to refer to variables with `$`, leading to code that repeats the name of the data frame many times: [^1]: dplyr's `filter()` is inspired by base R's `subset()`. `subset()` provides data masking, but not with tidy evaluation, so the techniques described in this chapter don't apply to it. ```{r, results = FALSE} starwars[starwars$homeworld == "Naboo" & starwars$species == "Human", ,] ``` The dplyr equivalent of this code is more concise because data masking allows you to need to type `starwars` once: ```{r, results = FALSE} starwars %>% filter(homeworld == "Naboo", species == "Human") ``` ### Data- and env-variables The key idea behind data masking is that it blurs the line between the two different meanings of the word "variable": - **env-variables** are "programming" variables that live in an environment. They are usually created with `<-`. - **data-variables** are "statistical" variables that live in a data frame. They usually come from data files (e.g. `.csv`, `.xls`), or are created manipulating existing variables. To make those definitions a little more concrete, take this piece of code: ```{r} df <- data.frame(x = runif(3), y = runif(3)) df$x ``` It creates a env-variable, `df`, that contains two data-variables, `x` and `y`. Then it extracts the data-variable `x` out of the env-variable `df` using `$`. I think this blurring of the meaning of "variable" is a really nice feature for interactive data analysis because it allows you to refer to data-vars as is, without any prefix. And this seems to be fairly intuitive since many newer R users will attempt to write `diamonds[x == 0 | y == 0, ]`. Unfortunately, this benefit does not come for free. When you start to program with these tools, you're going to have to grapple with the distinction. This will be hard because you've never had to think about it before, so it'll take a while for your brain to learn these new concepts and categories. However, once you've teased apart the idea of "variable" into data-variable and env-variable, I think you'll find it fairly straightforward to use. ### Indirection The main challenge of programming with functions that use data masking arises when you introduce some indirection, i.e. when you want to get the data-variable from an env-variable instead of directly typing the data-variable's name. There are two main cases: - When you have the data-variable in a function argument (i.e. an env-variable that holds a promise[^2]), you need to **embrace** the argument by surrounding it in doubled braces, like `filter(df, {{ var }})`. The following function uses embracing to create a wrapper around `summarise()` that computes the minimum and maximum values of a variable, as well as the number of observations that were summarised: ```{r, results = FALSE} var_summary <- function(data, var) { data %>% summarise(n = n(), min = min({{ var }}), max = max({{ var }})) } mtcars %>% group_by(cyl) %>% var_summary(mpg) ``` - When you have an env-variable that is a character vector, you need to index into the `.data` pronoun with `[[`, like `summarise(df, mean = mean(.data[[var]]))`. The following example uses `.data` to count the number of unique values in each variable of `mtcars`: ```{r, results = FALSE} for (var in names(mtcars)) { mtcars %>% count(.data[[var]]) %>% print() } ``` Note that `.data` is not a data frame; it's a special construct, a pronoun, that allows you to access the current variables either directly, with `.data$x` or indirectly with `.data[[var]]`. Don't expect other functions to work with it. [^2]: In R, arguments are lazily evaluated which means that until you attempt to use, they don't hold a value, just a **promise** that describes how to compute the value. You can learn more at ### Name injection Many data masking functions also use dynamic dots, which gives you another useful feature: generating names programmatically by using `:=` instead of `=`. There are two basics forms, as illustrated below with `tibble()`: - If you have the name in an env-variable, you can use glue syntax to interpolate in: ```{r} name <- "susan" tibble("{name}" := 2) ``` - If the name should be derived from a data-variable in an argument, you can use embracing syntax: ```{r} my_df <- function(x) { tibble("{{x}}_2" := x * 2) } my_var <- 10 my_df(my_var) ``` Learn more in `` ?rlang::`dyn-dots` ``. ## Tidy selection Data masking makes it easy to compute on values within a dataset. Tidy selection is a complementary tool that makes it easy to work with the columns of a dataset. ### The tidyselect DSL Underneath all functions that use tidy selection is the [tidyselect](https://tidyselect.r-lib.org/) package. It provides a miniature domain specific language that makes it easy to select columns by name, position, or type. For example: - `select(df, 1)` selects the first column; `select(df, last_col())` selects the last column. - `select(df, c(a, b, c))` selects columns `a`, `b`, and `c`. - `select(df, starts_with("a"))` selects all columns whose name starts with "a"; `select(df, ends_with("z"))` selects all columns whose name ends with "z". - `select(df, where(is.numeric))` selects all numeric columns. You can see more details in `?dplyr_tidy_select`. ### Indirection As with data masking, tidy selection makes a common task easier at the cost of making a less common task harder. When you want to use tidy select indirectly with the column specification stored in an intermediate variable, you'll need to learn some new tools. Again, there are two forms of indirection: - When you have the data-variable in an env-variable that is a function argument, you use the same technique as data masking: you **embrace** the argument by surrounding it in doubled braces. The following function summarises a data frame by computing the mean of all variables selected by the user: ```{r, results = FALSE} summarise_mean <- function(data, vars) { data %>% summarise(n = n(), across({{ vars }}, mean)) } mtcars %>% group_by(cyl) %>% summarise_mean(where(is.numeric)) ``` - When you have an env-variable that is a character vector, you need to use `all_of()` or `any_of()` depending on whether you want the function to error if a variable is not found. The following code uses `all_of()` to select all of the variables found in a character vector; then `!` plus `all_of()` to select all of the variables *not* found in a character vector: ```{r, results = FALSE} vars <- c("mpg", "vs") mtcars %>% select(all_of(vars)) mtcars %>% select(!all_of(vars)) ``` ## How-tos The following examples solve a grab bag of common problems. We show you the minimum amount of code so that you can get the basic idea; most real problems will require more code or combining multiple techniques. ### User-supplied data If you check the documentation, you'll see that `.data` never uses data masking or tidy select. That means you don't need to do anything special in your function: ```{r} mutate_y <- function(data) { mutate(data, y = a + x) } ``` ### One or more user-supplied expressions If you want the user to supply an expression that's passed onto an argument which uses data masking or tidy select, embrace the argument: ```{r} my_summarise <- function(data, group_var) { data %>% group_by({{ group_var }}) %>% summarise(mean = mean(mass)) } ``` This generalises in a straightforward way if you want to use one user-supplied expression in multiple places: ```{r} my_summarise2 <- function(data, expr) { data %>% summarise( mean = mean({{ expr }}), sum = sum({{ expr }}), n = n() ) } ``` If you want the user to provide multiple expressions, embrace each of them: ```{r} my_summarise3 <- function(data, mean_var, sd_var) { data %>% summarise(mean = mean({{ mean_var }}), sd = sd({{ sd_var }})) } ``` If you want to use the name of a variable in the output, you can embrace the variable name on the left-hand side of `:=` with `{{`: ```{r} my_summarise4 <- function(data, expr) { data %>% summarise( "mean_{{expr}}" := mean({{ expr }}), "sum_{{expr}}" := sum({{ expr }}), "n_{{expr}}" := n() ) } my_summarise5 <- function(data, mean_var, sd_var) { data %>% summarise( "mean_{{mean_var}}" := mean({{ mean_var }}), "sd_{{sd_var}}" := sd({{ sd_var }}) ) } ``` ### Any number of user-supplied expressions If you want to take an arbitrary number of user supplied expressions, use `...`. This is most often useful when you want to give the user full control over a single part of the pipeline, like a `group_by()` or a `mutate()`. ```{r} my_summarise <- function(.data, ...) { .data %>% group_by(...) %>% summarise(mass = mean(mass, na.rm = TRUE), height = mean(height, na.rm = TRUE)) } starwars %>% my_summarise(homeworld) starwars %>% my_summarise(sex, gender) ``` When you use `...` in this way, make sure that any other arguments start with `.` to reduce the chances of argument clashes; see for more details. ### Creating multiple columns Sometimes it can be useful for a single expression to return multiple columns. You can do this by returning an unnamed data frame: ```{r} quantile_df <- function(x, probs = c(0.25, 0.5, 0.75)) { tibble( val = quantile(x, probs), quant = probs ) } x <- 1:5 quantile_df(x) ``` This sort of function is useful inside `summarise()` and `mutate()` which allow you to add multiple columns by returning a data frame: ```{r} df <- tibble( grp = rep(1:3, each = 10), x = runif(30), y = rnorm(30) ) df %>% group_by(grp) %>% summarise(quantile_df(x, probs = .5)) df %>% group_by(grp) %>% summarise(across(x:y, ~ quantile_df(.x, probs = .5), .unpack = TRUE)) ``` Notice that we set `.unpack = TRUE` inside `across()`. This tells `across()` to _unpack_ the data frame returned by `quantile_df()` into its respective columns, combining the column names of the original columns (`x` and `y`) with the column names returned from the function (`val` and `quant`). If your function returns multiple _rows_ per group, then you'll need to switch from `summarise()` to `reframe()`. `summarise()` is restricted to returning 1 row summaries per group, but `reframe()` lifts this restriction: ```{r} df %>% group_by(grp) %>% reframe(across(x:y, quantile_df, .unpack = TRUE)) ``` ### Transforming user-supplied variables If you want the user to provide a set of data-variables that are then transformed, use `across()` and `pick()`: ```{r} my_summarise <- function(data, summary_vars) { data %>% summarise(across({{ summary_vars }}, ~ mean(., na.rm = TRUE))) } starwars %>% group_by(species) %>% my_summarise(c(mass, height)) ``` You can use this same idea for multiple sets of input data-variables: ```{r} my_summarise <- function(data, group_var, summarise_var) { data %>% group_by(pick({{ group_var }})) %>% summarise(across({{ summarise_var }}, mean)) } ``` Use the `.names` argument to `across()` to control the names of the output. ```{r} my_summarise <- function(data, group_var, summarise_var) { data %>% group_by(pick({{ group_var }})) %>% summarise(across({{ summarise_var }}, mean, .names = "mean_{.col}")) } ``` ### Loop over multiple variables If you have a character vector of variable names, and want to operate on them with a for loop, index into the special `.data` pronoun: ```{r, results = FALSE} for (var in names(mtcars)) { mtcars %>% count(.data[[var]]) %>% print() } ``` This same technique works with for loop alternatives like the base R `apply()` family and the purrr `map()` family: ```{r, results = FALSE} mtcars %>% names() %>% purrr::map(~ count(mtcars, .data[[.x]])) ``` (Note that the `x` in `.data[[x]]` is always treated as an env-variable; it will never come from the data.) ### Use a variable from an Shiny input Many Shiny input controls return character vectors, so you can use the same approach as above: `.data[[input$var]]`. ```{r, eval = FALSE} library(shiny) ui <- fluidPage( selectInput("var", "Variable", choices = names(diamonds)), tableOutput("output") ) server <- function(input, output, session) { data <- reactive(filter(diamonds, .data[[input$var]] > 0)) output$output <- renderTable(head(data())) } ``` See for more details and case studies. dplyr/inst/doc/in-packages.Rmd0000644000176200001440000002006414406402754015764 0ustar liggesusers--- title: "Using dplyr in packages" description: > A guide for package authors who use dplyr. output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Using dplyr in packages} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ``` ```{r setup, message = FALSE} library(dplyr) ``` This vignette is aimed at package authors who use dplyr in their packages. We will discuss best practices learned over the years to avoid `R CMD check` notes and warnings, and how to handle when dplyr deprecates functions. ## Join helpers As of dplyr 1.1.0, we've introduced `join_by()` along 4 helpers for performing various types of joins: - `closest()` - `between()` - `within()` - `overlaps()` `join_by()` implements a domain specific language (DSL) for joins, and internally interprets calls to these functions. You'll notice that `dplyr::closest()` isn't an exported function from dplyr (`dplyr::between()` and `base::within()` do happen to be preexisting functions). If you use `closest()` in your package, then this will cause an `R CMD check` note letting you know that you've used a symbol that doesn't belong to any package. To silence this, place `utils::globalVariables("closest")` in a source file in your package (but outside of any function). dbplyr does a similar thing for SQL functions, so you can see an example of that [here](https://github.com/tidyverse/dbplyr/blob/7edf5d607fd6b0b897721ea96d1c9ca9401f0f9b/R/backend-redshift.R#L144). You may also have to add utils to your package Imports, even though it is a base package. You can do that easily with `usethis::use_package("utils")`. ## Data masking and tidy selection NOTEs If you're writing a package and you have a function that uses data masking or tidy selection: ```{r} my_summary_function <- function(data) { data %>% select(grp, x, y) %>% filter(x > 0) %>% group_by(grp) %>% summarise(y = mean(y), n = n()) } ``` You'll get an `NOTE` because `R CMD check` doesn't know that dplyr functions use tidy evaluation: N checking R code for possible problems my_summary_function: no visible binding for global variable ‘grp’, ‘x’, ‘y’ Undefined global functions or variables: grp x y To eliminate this note: - For data masking, import `.data` from [rlang](https://rlang.r-lib.org/) and then use `.data$var` instead of `var`. - For tidy selection, use `"var"` instead of `var`. That yields: ```{r} #' @importFrom rlang .data my_summary_function <- function(data) { data %>% select("grp", "x", "y") %>% filter(.data$x > 0) %>% group_by(.data$grp) %>% summarise(y = mean(.data$y), n = n()) } ``` For more about programming with dplyr, see `vignette("programming", package = "dplyr")`. ## Deprecation This section is focused on updating package code to deal with backwards incompatible changes in dplyr. We do try and minimize backward incompatible changes as much as possible, but sometimes they are necessary in order to radically simplify existing code, or unlock a lot of potential value in the future. We will start with some general advice about supporting multiple versions of dplyr at once, and then we will discuss some specific changes in dplyr. ### Multiple dplyr versions Ideally, when we introduce a breaking change you'll want to make sure that your package works with both the released version and the development version of dplyr. This is typically a little bit more work, but has two big advantages: - It's more convenient for your users, since your package will work for them regardless of what version of dplyr they have installed. - It's easier on CRAN since it doesn't require a massive coordinated release of multiple packages. If we break your package, we will typically send you a pull request that implements a patch before releasing the next version of dplyr. Most of the time, this patch will be backwards compatible with older versions of dplyr as well. Ideally, you'll accept this patch and submit a new version of your package to CRAN before the new version of dplyr is released. To make code work with multiple versions of a package, your first tool is the simple if statement: ```{r, eval=FALSE} if (utils::packageVersion("dplyr") > "0.5.0") { # code for new version } else { # code for old version } ``` Always condition on `> current-version`, not `>= next-version` because this will ensure that this branch is also used for the development version of the package. For example, if the current release is version `"0.5.0"`, the development version will be `"0.5.0.9000"`. This typically works well if the branch for the "new version" introduces a new argument or has a slightly different return value. This *doesn't* work if we've introduced a new function that you need to switch to, like: ```{r, eval=FALSE} if (utils::packageVersion("dplyr") > "1.0.10") { dplyr::reframe(df, x = unique(x)) } else { dplyr::summarise(df, x = unique(x)) } ``` In this case, when checks are run with dplyr 1.0.10 you'll get a warning about using a function from dplyr that doesn't exist (`reframe()`) even though that branch will never run. You can get around this by using `utils::getFromNamespace()` to indirectly call the new dplyr function: ```{r, eval=FALSE} if (utils::packageVersion("dplyr") > "1.0.10") { utils::getFromNamespace("reframe", "dplyr")(df, x = unique(x)) } else { dplyr::summarise(df, x = unique(x)) } ``` As soon as the next version of dplyr is actually on CRAN (1.1.0 in this case), you should feel free to remove this code and unconditionally use `reframe()` as long as you also require `dplyr (>= 1.1.0)` in your `DESCRIPTION` file. This is typically not very painful for users, because they'd already be updating your package when they run into this requirement, so updating one more package along the way is generally easy. It also helps them get the latest bug fixes and features from dplyr. Sometimes, it isn't possible to avoid a call to `@importFrom`. For example you might be importing a generic so that you can define a method for it, but that generic has moved between packages. In this case, you can take advantage of a little-known feature in the `NAMESPACE` file: you can include raw `if` statements. ```{r, eval=FALSE} #' @rawNamespace #' if (utils::packageVersion("dplyr") > "0.5.0") { #' importFrom("dbplyr", "build_sql") #' } else { #' importFrom("dplyr", "build_sql") #' } ``` ### Deprecation of `mutate_*()` and `summarise_*()` The following `mutate()` and `summarise()` variants were deprecated in dplyr 0.7.0: - `mutate_each()`, `summarise_each()` and the following variants were superseded in dplyr 1.0.0: - `mutate_all()`, `summarise_all()` - `mutate_if()`, `summarise_if()` - `mutate_at()`, `summarise_at()` These have all been replaced by using `mutate()` or `summarise()` in combination with `across()`, which was introduced in dplyr 1.0.0. If you used `mutate_all()` or `mutate_each()` without supplying a selection, you should update to use `across(everything())`: ```{r, eval=FALSE} starwars %>% mutate_each(funs(as.character)) starwars %>% mutate_all(funs(as.character)) starwars %>% mutate(across(everything(), as.character)) ``` If you provided a selection through `mutate_at()` or `mutate_each()`, then you can switch to `across()` with a selection: ```{r, eval = FALSE} starwars %>% mutate_each(funs(as.character), height, mass) starwars %>% mutate_at(vars(height, mass), as.character) starwars %>% mutate(across(c(height, mass), as.character)) ``` If you used predicates with `mutate_if()`, you can switch to using `across()` in combination with `where()`: ```{r, eval=FALSE} starwars %>% mutate_if(is.factor, as.character) starwars %>% mutate(across(where(is.factor), as.character)) ``` ## Data frame subclasses If you are a package author that is *extending* dplyr to work with a new data frame subclass, then we encourage you to read the documentation in `?dplyr_extending`. This contains advice on how to implement the minimal number of extension generics possible to get maximal compatibility across dplyr's verbs. dplyr/inst/doc/grouping.Rmd0000644000176200001440000001637514366556340015454 0ustar liggesusers--- title: "Grouped data" description: > To unlock the full potential of dplyr, you need to understand how each verb interacts with grouping. This vignette shows you how to manipulate grouping, how each verb changes its behaviour when working with grouped data, and how you can access data about the "current" group from within a verb. output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Grouped data} %\VignetteEngine{knitr::rmarkdown} \usepackage[utf8]{inputenc} --- ```{r, echo = FALSE, message = FALSE, warning = FALSE} knitr::opts_chunk$set(collapse = T, comment = "#>") options(tibble.print_min = 4L, tibble.print_max = 4L) ``` dplyr verbs are particularly powerful when you apply them to grouped data frames (`grouped_df` objects). This vignette shows you: * How to group, inspect, and ungroup with `group_by()` and friends. * How individual dplyr verbs changes their behaviour when applied to grouped data frame. * How to access data about the "current" group from within a verb. We'll start by loading dplyr: ```{r, message = FALSE} library(dplyr) ``` ## `group_by()` The most important grouping verb is `group_by()`: it takes a data frame and one or more variables to group by: ```{r} by_species <- starwars %>% group_by(species) by_sex_gender <- starwars %>% group_by(sex, gender) ``` You can see the grouping when you print the data: ```{r} by_species by_sex_gender ``` Or use `tally()` to count the number of rows in each group. The `sort` argument is useful if you want to see the largest groups up front. ```{r} by_species %>% tally() by_sex_gender %>% tally(sort = TRUE) ``` As well as grouping by existing variables, you can group by any function of existing variables. This is equivalent to performing a `mutate()` **before** the `group_by()`: ```{r group_by_with_expression} bmi_breaks <- c(0, 18.5, 25, 30, Inf) starwars %>% group_by(bmi_cat = cut(mass/(height/100)^2, breaks=bmi_breaks)) %>% tally() ``` ## Group metadata You can see underlying group data with `group_keys()`. It has one row for each group and one column for each grouping variable: ```{r group_vars} by_species %>% group_keys() by_sex_gender %>% group_keys() ``` You can see which group each row belongs to with `group_indices()`: ```{r} by_species %>% group_indices() ``` And which rows each group contains with `group_rows()`: ```{r} by_species %>% group_rows() %>% head() ``` Use `group_vars()` if you just want the names of the grouping variables: ```{r} by_species %>% group_vars() by_sex_gender %>% group_vars() ``` ### Changing and adding to grouping variables If you apply `group_by()` to an already grouped dataset, will overwrite the existing grouping variables. For example, the following code groups by `homeworld` instead of `species`: ```{r} by_species %>% group_by(homeworld) %>% tally() ``` To **augment** the grouping, using `.add = TRUE`[^add]. For example, the following code groups by species and homeworld: ```{r} by_species %>% group_by(homeworld, .add = TRUE) %>% tally() ``` [^add]: Note that the argument changed from `add = TRUE` to `.add = TRUE` in dplyr 1.0.0. ### Removing grouping variables To remove all grouping variables, use `ungroup()`: ```{r} by_species %>% ungroup() %>% tally() ``` You can also choose to selectively ungroup by listing the variables you want to remove: ```{r} by_sex_gender %>% ungroup(sex) %>% tally() ``` ## Verbs The following sections describe how grouping affects the main dplyr verbs. ### `summarise()` `summarise()` computes a summary for each group. This means that it starts from `group_keys()`, adding summary variables to the right hand side: ```{r summarise} by_species %>% summarise( n = n(), height = mean(height, na.rm = TRUE) ) ``` The `.groups=` argument controls the grouping structure of the output. The historical behaviour of removing the right hand side grouping variable corresponds to `.groups = "drop_last"` without a message or `.groups = NULL` with a message (the default). ```{r} by_sex_gender %>% summarise(n = n()) %>% group_vars() by_sex_gender %>% summarise(n = n(), .groups = "drop_last") %>% group_vars() ``` Since version 1.0.0 the groups may also be kept (`.groups = "keep"`) or dropped (`.groups = "drop"`). ```{r} by_sex_gender %>% summarise(n = n(), .groups = "keep") %>% group_vars() by_sex_gender %>% summarise(n = n(), .groups = "drop") %>% group_vars() ``` When the output no longer have grouping variables, it becomes ungrouped (i.e. a regular tibble). ### `select()`, `rename()`, and `relocate()` `rename()` and `relocate()` behave identically with grouped and ungrouped data because they only affect the name or position of existing columns. Grouped `select()` is almost identical to ungrouped select, except that it always includes the grouping variables: ```{r select} by_species %>% select(mass) ``` If you don't want the grouping variables, you'll have to first `ungroup()`. (This design is possibly a mistake, but we're stuck with it for now.) ### `arrange()` Grouped `arrange()` is the same as ungrouped `arrange()`, unless you set `.by_group = TRUE`, in which case it will order first by the grouping variables. ```{r} by_species %>% arrange(desc(mass)) %>% relocate(species, mass) by_species %>% arrange(desc(mass), .by_group = TRUE) %>% relocate(species, mass) ``` Note that second example is sorted by `species` (from the `group_by()` statement) and then by `mass` (within species). ### `mutate()` In simple cases with vectorised functions, grouped and ungrouped `mutate()` give the same results. They differ when used with summary functions: ```{r by_homeworld} # Subtract off global mean starwars %>% select(name, homeworld, mass) %>% mutate(standard_mass = mass - mean(mass, na.rm = TRUE)) # Subtract off homeworld mean starwars %>% select(name, homeworld, mass) %>% group_by(homeworld) %>% mutate(standard_mass = mass - mean(mass, na.rm = TRUE)) ``` Or with window functions like `min_rank()`: ```{r} # Overall rank starwars %>% select(name, homeworld, height) %>% mutate(rank = min_rank(height)) # Rank per homeworld starwars %>% select(name, homeworld, height) %>% group_by(homeworld) %>% mutate(rank = min_rank(height)) ``` ### `filter()` A grouped `filter()` effectively does a `mutate()` to generate a logical variable, and then only keeps the rows where the variable is `TRUE`. This means that grouped filters can be used with summary functions. For example, we can find the tallest character of each species: ```{r filter} by_species %>% select(name, species, height) %>% filter(height == max(height)) ``` You can also use `filter()` to remove entire groups. For example, the following code eliminates all groups that only have a single member: ```{r filter_group} by_species %>% filter(n() != 1) %>% tally() ``` ### `slice()` and friends `slice()` and friends (`slice_head()`, `slice_tail()`, `slice_sample()`, `slice_min()` and `slice_max()`) select rows within a group. For example, we can select the first observation within each species: ```{r slice} by_species %>% relocate(species) %>% slice(1) ``` Similarly, we can use `slice_min()` to select the smallest `n` values of a variable: ```{r slice_min} by_species %>% filter(!is.na(height)) %>% slice_min(height, n = 2) ``` dplyr/inst/doc/grouping.R0000644000176200001440000001022414525507102015103 0ustar liggesusers## ----echo = FALSE, message = FALSE, warning = FALSE--------------------------- knitr::opts_chunk$set(collapse = T, comment = "#>") options(tibble.print_min = 4L, tibble.print_max = 4L) ## ----message = FALSE---------------------------------------------------------- library(dplyr) ## ----------------------------------------------------------------------------- by_species <- starwars %>% group_by(species) by_sex_gender <- starwars %>% group_by(sex, gender) ## ----------------------------------------------------------------------------- by_species by_sex_gender ## ----------------------------------------------------------------------------- by_species %>% tally() by_sex_gender %>% tally(sort = TRUE) ## ----group_by_with_expression------------------------------------------------- bmi_breaks <- c(0, 18.5, 25, 30, Inf) starwars %>% group_by(bmi_cat = cut(mass/(height/100)^2, breaks=bmi_breaks)) %>% tally() ## ----group_vars--------------------------------------------------------------- by_species %>% group_keys() by_sex_gender %>% group_keys() ## ----------------------------------------------------------------------------- by_species %>% group_indices() ## ----------------------------------------------------------------------------- by_species %>% group_rows() %>% head() ## ----------------------------------------------------------------------------- by_species %>% group_vars() by_sex_gender %>% group_vars() ## ----------------------------------------------------------------------------- by_species %>% group_by(homeworld) %>% tally() ## ----------------------------------------------------------------------------- by_species %>% group_by(homeworld, .add = TRUE) %>% tally() ## ----------------------------------------------------------------------------- by_species %>% ungroup() %>% tally() ## ----------------------------------------------------------------------------- by_sex_gender %>% ungroup(sex) %>% tally() ## ----summarise---------------------------------------------------------------- by_species %>% summarise( n = n(), height = mean(height, na.rm = TRUE) ) ## ----------------------------------------------------------------------------- by_sex_gender %>% summarise(n = n()) %>% group_vars() by_sex_gender %>% summarise(n = n(), .groups = "drop_last") %>% group_vars() ## ----------------------------------------------------------------------------- by_sex_gender %>% summarise(n = n(), .groups = "keep") %>% group_vars() by_sex_gender %>% summarise(n = n(), .groups = "drop") %>% group_vars() ## ----select------------------------------------------------------------------- by_species %>% select(mass) ## ----------------------------------------------------------------------------- by_species %>% arrange(desc(mass)) %>% relocate(species, mass) by_species %>% arrange(desc(mass), .by_group = TRUE) %>% relocate(species, mass) ## ----by_homeworld------------------------------------------------------------- # Subtract off global mean starwars %>% select(name, homeworld, mass) %>% mutate(standard_mass = mass - mean(mass, na.rm = TRUE)) # Subtract off homeworld mean starwars %>% select(name, homeworld, mass) %>% group_by(homeworld) %>% mutate(standard_mass = mass - mean(mass, na.rm = TRUE)) ## ----------------------------------------------------------------------------- # Overall rank starwars %>% select(name, homeworld, height) %>% mutate(rank = min_rank(height)) # Rank per homeworld starwars %>% select(name, homeworld, height) %>% group_by(homeworld) %>% mutate(rank = min_rank(height)) ## ----filter------------------------------------------------------------------- by_species %>% select(name, species, height) %>% filter(height == max(height)) ## ----filter_group------------------------------------------------------------- by_species %>% filter(n() != 1) %>% tally() ## ----slice-------------------------------------------------------------------- by_species %>% relocate(species) %>% slice(1) ## ----slice_min---------------------------------------------------------------- by_species %>% filter(!is.na(height)) %>% slice_min(height, n = 2) dplyr/inst/doc/colwise.Rmd0000644000176200001440000003007314366556340015256 0ustar liggesusers--- title: "Column-wise operations" description: > Learn how to easily repeat the same operation across multiple columns using `across()`. output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Column-wise operations} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} knitr::opts_chunk$set(collapse = T, comment = "#>") options(tibble.print_min = 4L, tibble.print_max = 4L) set.seed(1014) ``` It's often useful to perform the same operation on multiple columns, but copying and pasting is both tedious and error prone: ```{r, eval = FALSE} df %>% group_by(g1, g2) %>% summarise(a = mean(a), b = mean(b), c = mean(c), d = mean(d)) ``` (If you're trying to compute `mean(a, b, c, d)` for each row, instead see `vignette("rowwise")`) This vignette will introduce you to the `across()` function, which lets you rewrite the previous code more succinctly: ```{r, eval = FALSE} df %>% group_by(g1, g2) %>% summarise(across(a:d, mean)) ``` We'll start by discussing the basic usage of `across()`, particularly as it applies to `summarise()`, and show how to use it with multiple functions. We'll then show a few uses with other verbs. We'll finish off with a bit of history, showing why we prefer `across()` to our last approach (the `_if()`, `_at()` and `_all()` functions) and how to translate your old code to the new syntax. ```{r setup} library(dplyr, warn.conflicts = FALSE) ``` ## Basic usage `across()` has two primary arguments: * The first argument, `.cols`, selects the columns you want to operate on. It uses tidy selection (like `select()`) so you can pick variables by position, name, and type. * The second argument, `.fns`, is a function or list of functions to apply to each column. This can also be a purrr style formula (or list of formulas) like `~ .x / 2`. (This argument is optional, and you can omit it if you just want to get the underlying data; you'll see that technique used in `vignette("rowwise")`.) Here are a couple of examples of `across()` in conjunction with its favourite verb, `summarise()`. But you can use `across()` with any dplyr verb, as you'll see a little later. ```{r} starwars %>% summarise(across(where(is.character), n_distinct)) starwars %>% group_by(species) %>% filter(n() > 1) %>% summarise(across(c(sex, gender, homeworld), n_distinct)) starwars %>% group_by(homeworld) %>% filter(n() > 1) %>% summarise(across(where(is.numeric), ~ mean(.x, na.rm = TRUE))) ``` Because `across()` is usually used in combination with `summarise()` and `mutate()`, it doesn't select grouping variables in order to avoid accidentally modifying them: ```{r} df <- data.frame(g = c(1, 1, 2), x = c(-1, 1, 3), y = c(-1, -4, -9)) df %>% group_by(g) %>% summarise(across(where(is.numeric), sum)) ``` ### Multiple functions You can transform each variable with more than one function by supplying a named list of functions or lambda functions in the second argument: ```{r} min_max <- list( min = ~min(.x, na.rm = TRUE), max = ~max(.x, na.rm = TRUE) ) starwars %>% summarise(across(where(is.numeric), min_max)) starwars %>% summarise(across(c(height, mass, birth_year), min_max)) ``` Control how the names are created with the `.names` argument which takes a [glue](https://glue.tidyverse.org/) spec: ```{r} starwars %>% summarise(across(where(is.numeric), min_max, .names = "{.fn}.{.col}")) starwars %>% summarise(across(c(height, mass, birth_year), min_max, .names = "{.fn}.{.col}")) ``` If you'd prefer all summaries with the same function to be grouped together, you'll have to expand the calls yourself: ```{r} starwars %>% summarise( across(c(height, mass, birth_year), ~min(.x, na.rm = TRUE), .names = "min_{.col}"), across(c(height, mass, birth_year), ~max(.x, na.rm = TRUE), .names = "max_{.col}") ) ``` (One day this might become an argument to `across()` but we're not yet sure how it would work.) We cannot however use `where(is.numeric)` in that last case because the second `across()` would pick up the variables that were newly created ("min_height", "min_mass" and "min_birth_year"). We can work around this by combining both calls to `across()` into a single expression that returns a tibble: ```{r} starwars %>% summarise( tibble( across(where(is.numeric), ~min(.x, na.rm = TRUE), .names = "min_{.col}"), across(where(is.numeric), ~max(.x, na.rm = TRUE), .names = "max_{.col}") ) ) ``` Alternatively we could reorganize results with `relocate()`: ```{r} starwars %>% summarise(across(where(is.numeric), min_max, .names = "{.fn}.{.col}")) %>% relocate(starts_with("min")) ``` ### Current column If you need to, you can access the name of the "current" column inside by calling `cur_column()`. This can be useful if you want to perform some sort of context dependent transformation that's already encoded in a vector: ```{r} df <- tibble(x = 1:3, y = 3:5, z = 5:7) mult <- list(x = 1, y = 10, z = 100) df %>% mutate(across(all_of(names(mult)), ~ .x * mult[[cur_column()]])) ``` ### Gotchas Be careful when combining numeric summaries with `where(is.numeric)`: ```{r} df <- data.frame(x = c(1, 2, 3), y = c(1, 4, 9)) df %>% summarise(n = n(), across(where(is.numeric), sd)) ``` Here `n` becomes `NA` because `n` is numeric, so the `across()` computes its standard deviation, and the standard deviation of 3 (a constant) is `NA`. You probably want to compute `n()` last to avoid this problem: ```{r} df %>% summarise(across(where(is.numeric), sd), n = n()) ``` Alternatively, you could explicitly exclude `n` from the columns to operate on: ```{r} df %>% summarise(n = n(), across(where(is.numeric) & !n, sd)) ``` Another approach is to combine both the call to `n()` and `across()` in a single expression that returns a tibble: ```{r} df %>% summarise( tibble(n = n(), across(where(is.numeric), sd)) ) ``` ### Other verbs So far we've focused on the use of `across()` with `summarise()`, but it works with any other dplyr verb that uses data masking: * Rescale all numeric variables to range 0-1: ```{r} rescale01 <- function(x) { rng <- range(x, na.rm = TRUE) (x - rng[1]) / (rng[2] - rng[1]) } df <- tibble(x = 1:4, y = rnorm(4)) df %>% mutate(across(where(is.numeric), rescale01)) ``` For some verbs, like `group_by()`, `count()` and `distinct()`, you don't need to supply a summary function, but it can be useful to use tidy-selection to dynamically select a set of columns. In those cases, we recommend using the complement to `across()`, `pick()`, which works like `across()` but doesn't apply any functions and instead returns a data frame containing the selected columns. * Find all distinct ```{r} starwars %>% distinct(pick(contains("color"))) ``` * Count all combinations of variables with a given pattern: ```{r} starwars %>% count(pick(contains("color")), sort = TRUE) ``` `across()` doesn't work with `select()` or `rename()` because they already use tidy select syntax; if you want to transform column names with a function, you can use `rename_with()`. ### filter() We cannot directly use `across()` in `filter()` because we need an extra step to combine the results. To that end, `filter()` has two special purpose companion functions: * `if_any()` keeps the rows where the predicate is true for *at least one* selected column: ```{r} starwars %>% filter(if_any(everything(), ~ !is.na(.x))) ``` * `if_all()` keeps the rows where the predicate is true for *all* selected columns: ```{r} starwars %>% filter(if_all(everything(), ~ !is.na(.x))) ``` ## `_if`, `_at`, `_all` Prior versions of dplyr allowed you to apply a function to multiple columns in a different way: using functions with `_if`, `_at`, and `_all()` suffixes. These functions solved a pressing need and are used by many people, but are now superseded. That means that they'll stay around, but won't receive any new features and will only get critical bug fixes. ### Why do we like `across()`? Why did we decide to move away from these functions in favour of `across()`? 1. `across()` makes it possible to express useful summaries that were previously impossible: ```{r, eval = FALSE} df %>% group_by(g1, g2) %>% summarise( across(where(is.numeric), mean), across(where(is.factor), nlevels), n = n(), ) ``` 1. `across()` reduces the number of functions that dplyr needs to provide. This makes dplyr easier for you to use (because there are fewer functions to remember) and easier for us to implement new verbs (since we only need to implement one function, not four). 1. `across()` unifies `_if` and `_at` semantics so that you can select by position, name, and type, and you can now create compound selections that were previously impossible. For example, you can now transform all numeric columns whose name begins with "x": `across(where(is.numeric) & starts_with("x"))`. 1. `across()` doesn't need to use `vars()`. The `_at()` functions are the only place in dplyr where you have to manually quote variable names, which makes them a little weird and hence harder to remember. ### Why did it take so long to discover `across()`? It's disappointing that we didn't discover `across()` earlier, and instead worked through several false starts (first not realising that it was a common problem, then with the `_each()` functions, and most recently with the `_if()`/`_at()`/`_all()` functions). But `across()` couldn't work without three recent discoveries: * You can have a column of a data frame that is itself a data frame. This is something provided by base R, but it's not very well documented, and it took a while to see that it was useful, not just a theoretical curiosity. * We can use data frames to allow summary functions to return multiple columns. * We can use the absence of an outer name as a convention that you want to unpack a data frame column into individual columns. ### How do you convert existing code? Fortunately, it's generally straightforward to translate your existing code to use `across()`: * Strip the `_if()`, `_at()` and `_all()` suffix off the function. * Call `across()`. The first argument will be: 1. For `_if()`, the old second argument wrapped in `where()`. 1. For `_at()`, the old second argument, with the call to `vars()` removed. 1. For `_all()`, `everything()`. The subsequent arguments can be copied as is. For example: ```{r, results = FALSE} df %>% mutate_if(is.numeric, ~mean(.x, na.rm = TRUE)) # -> df %>% mutate(across(where(is.numeric), ~mean(.x, na.rm = TRUE))) df %>% mutate_at(vars(c(x, starts_with("y"))), mean) # -> df %>% mutate(across(c(x, starts_with("y")), mean)) df %>% mutate_all(mean) # -> df %>% mutate(across(everything(), mean)) ``` There are a few exceptions to this rule: * `rename_*()` and `select_*()` follow a different pattern. They already have select semantics, so are generally used in a different way that doesn't have a direct equivalent with `across()`; use the new `rename_with()` instead. * Previously, `filter_*()` were paired with the `all_vars()` and `any_vars()` helpers. The new helpers `if_any()` and `if_all()` can be used inside `filter()` to keep rows for which the predicate is true for at least one, or all selected columns: ```{r} df <- tibble(x = c("a", "b"), y = c(1, 1), z = c(-1, 1)) # Find all rows where EVERY numeric variable is greater than zero df %>% filter(if_all(where(is.numeric), ~ .x > 0)) # Find all rows where ANY numeric variable is greater than zero df %>% filter(if_any(where(is.numeric), ~ .x > 0)) ``` * When used in a `mutate()`, all transformations performed by an `across()` are applied at once. This is different to the behaviour of `mutate_if()`, `mutate_at()`, and `mutate_all()`, which apply the transformations one at a time. We expect that you'll generally find the new behaviour less surprising: ```{r} df <- tibble(x = 2, y = 4, z = 8) df %>% mutate_all(~ .x / y) df %>% mutate(across(everything(), ~ .x / y)) ```